Skip to content

Instantly share code, notes, and snippets.

@djbyrne
Last active February 5, 2023 02:02
Show Gist options
  • Save djbyrne/d58aa1abfe0a14e68686b2c514120d49 to your computer and use it in GitHub Desktop.
Save djbyrne/d58aa1abfe0a14e68686b2c514120d49 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Twin Delayed Deep Deterministic Policy Gradient (TD3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Imports"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"from torch.autograd import Variable\n",
"import torch.nn.functional as F\n",
"from tensorboardX import SummaryWriter\n",
"\n",
"import gym\n",
"import roboschool\n",
"import sys"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Networks"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def hidden_init(layer):\n",
" fan_in = layer.weight.data.size()[0]\n",
" lim = 1. / np.sqrt(fan_in)\n",
" return (-lim, lim)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"class Actor(nn.Module):\n",
" \"\"\"Initialize parameters and build model.\n",
" Args:\n",
" state_size (int): Dimension of each state\n",
" action_size (int): Dimension of each action\n",
" max_action (float): highest action to take\n",
" seed (int): Random seed\n",
" h1_units (int): Number of nodes in first hidden layer\n",
" h2_units (int): Number of nodes in second hidden layer\n",
" \n",
" Return:\n",
" action output of network with tanh activation\n",
" \"\"\"\n",
" \n",
" def __init__(self, state_dim, action_dim, max_action):\n",
" super(Actor, self).__init__()\n",
"\n",
" self.l1 = nn.Linear(state_dim, 400)\n",
" self.l2 = nn.Linear(400, 300)\n",
" self.l3 = nn.Linear(300, action_dim)\n",
"\n",
" self.max_action = max_action\n",
"\n",
"\n",
" def forward(self, x):\n",
" x = F.relu(self.l1(x))\n",
" x = F.relu(self.l2(x))\n",
" x = self.max_action * torch.tanh(self.l3(x)) \n",
" return x\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"class Critic(nn.Module):\n",
" \"\"\"Initialize parameters and build model.\n",
" Args:\n",
" state_size (int): Dimension of each state\n",
" action_size (int): Dimension of each action\n",
" max_action (float): highest action to take\n",
" seed (int): Random seed\n",
" h1_units (int): Number of nodes in first hidden layer\n",
" h2_units (int): Number of nodes in second hidden layer\n",
" \n",
" Return:\n",
" value output of network \n",
" \"\"\"\n",
" \n",
" def __init__(self, state_dim, action_dim):\n",
" super(Critic, self).__init__()\n",
"\n",
" # Q1 architecture\n",
" self.l1 = nn.Linear(state_dim + action_dim, 400)\n",
" self.l2 = nn.Linear(400, 300)\n",
" self.l3 = nn.Linear(300, 1)\n",
"\n",
" # Q2 architecture\n",
" self.l4 = nn.Linear(state_dim + action_dim, 400)\n",
" self.l5 = nn.Linear(400, 300)\n",
" self.l6 = nn.Linear(300, 1)\n",
"\n",
"\n",
" def forward(self, x, u):\n",
" xu = torch.cat([x, u], 1)\n",
"\n",
" x1 = F.relu(self.l1(xu))\n",
" x1 = F.relu(self.l2(x1))\n",
" x1 = self.l3(x1)\n",
"\n",
" x2 = F.relu(self.l4(xu))\n",
" x2 = F.relu(self.l5(x2))\n",
" x2 = self.l6(x2)\n",
" return x1, x2\n",
"\n",
"\n",
" def Q1(self, x, u):\n",
" xu = torch.cat([x, u], 1)\n",
"\n",
" x1 = F.relu(self.l1(xu))\n",
" x1 = F.relu(self.l2(x1))\n",
" x1 = self.l3(x1)\n",
" return x1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Memory"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Code based on: \n",
"# https://github.com/openai/baselines/blob/master/baselines/deepq/replay_buffer.py\n",
"\n",
"# Expects tuples of (state, next_state, action, reward, done)\n",
"class ReplayBuffer(object):\n",
" \"\"\"Buffer to store tuples of experience replay\"\"\"\n",
" \n",
" def __init__(self, max_size=1000000):\n",
" \"\"\"\n",
" Args:\n",
" max_size (int): total amount of tuples to store\n",
" \"\"\"\n",
" \n",
" self.storage = []\n",
" self.max_size = max_size\n",
" self.ptr = 0\n",
"\n",
" def add(self, data):\n",
" \"\"\"Add experience tuples to buffer\n",
" \n",
" Args:\n",
" data (tuple): experience replay tuple\n",
" \"\"\"\n",
" \n",
" if len(self.storage) == self.max_size:\n",
" self.storage[int(self.ptr)] = data\n",
" self.ptr = (self.ptr + 1) % self.max_size\n",
" else:\n",
" self.storage.append(data)\n",
"\n",
" def sample(self, batch_size):\n",
" \"\"\"Samples a random amount of experiences from buffer of batch size\n",
" \n",
" Args:\n",
" batch_size (int): size of sample\n",
" \"\"\"\n",
" \n",
" ind = np.random.randint(0, len(self.storage), size=batch_size)\n",
" states, actions, next_states, rewards, dones = [], [], [], [], []\n",
"\n",
" for i in ind: \n",
" s, a, s_, r, d = self.storage[i]\n",
" states.append(np.array(s, copy=False))\n",
" actions.append(np.array(a, copy=False))\n",
" next_states.append(np.array(s_, copy=False))\n",
" rewards.append(np.array(r, copy=False))\n",
" dones.append(np.array(d, copy=False))\n",
"\n",
" return np.array(states), np.array(actions), np.array(next_states), np.array(rewards).reshape(-1, 1), np.array(dones).reshape(-1, 1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Agent"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"class TD3(object):\n",
" \"\"\"Agent class that handles the training of the networks and provides outputs as actions\n",
" \n",
" Args:\n",
" state_dim (int): state size\n",
" action_dim (int): action size\n",
" max_action (float): highest action to take\n",
" device (device): cuda or cpu to process tensors\n",
" env (env): gym environment to use\n",
" \n",
" \"\"\"\n",
" \n",
" def __init__(self, state_dim, action_dim, max_action, env):\n",
" self.actor = Actor(state_dim, action_dim, max_action).to(device)\n",
" self.actor_target = Actor(state_dim, action_dim, max_action).to(device)\n",
" self.actor_target.load_state_dict(self.actor.state_dict())\n",
" self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=1e-3)\n",
"\n",
" self.critic = Critic(state_dim, action_dim).to(device)\n",
" self.critic_target = Critic(state_dim, action_dim).to(device)\n",
" self.critic_target.load_state_dict(self.critic.state_dict())\n",
" self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=1e-3)\n",
"\n",
" self.max_action = max_action\n",
" self.env = env\n",
"\n",
"\n",
" \n",
" def select_action(self, state, noise=0.1):\n",
" \"\"\"Select an appropriate action from the agent policy\n",
" \n",
" Args:\n",
" state (array): current state of environment\n",
" noise (float): how much noise to add to acitons\n",
" \n",
" Returns:\n",
" action (float): action clipped within action range\n",
" \n",
" \"\"\"\n",
" \n",
" state = torch.FloatTensor(state.reshape(1, -1)).to(device)\n",
" \n",
" action = self.actor(state).cpu().data.numpy().flatten()\n",
" if noise != 0: \n",
" action = (action + np.random.normal(0, noise, size=self.env.action_space.shape[0]))\n",
" \n",
" return action.clip(self.env.action_space.low, self.env.action_space.high)\n",
"\n",
" \n",
" def train(self, replay_buffer, iterations, batch_size=100, discount=0.99, tau=0.005, policy_noise=0.2, noise_clip=0.5, policy_freq=2):\n",
" \"\"\"Train and update actor and critic networks\n",
" \n",
" Args:\n",
" replay_buffer (ReplayBuffer): buffer for experience replay\n",
" iterations (int): how many times to run training\n",
" batch_size(int): batch size to sample from replay buffer\n",
" discount (float): discount factor\n",
" tau (float): soft update for main networks to target networks\n",
" \n",
" Return:\n",
" actor_loss (float): loss from actor network\n",
" critic_loss (float): loss from critic network\n",
" \n",
" \"\"\"\n",
" \n",
" for it in range(iterations):\n",
"\n",
" # Sample replay buffer \n",
" x, y, u, r, d = replay_buffer.sample(batch_size)\n",
" state = torch.FloatTensor(x).to(device)\n",
" action = torch.FloatTensor(u).to(device)\n",
" next_state = torch.FloatTensor(y).to(device)\n",
" done = torch.FloatTensor(1 - d).to(device)\n",
" reward = torch.FloatTensor(r).to(device)\n",
"\n",
" # Select action according to policy and add clipped noise \n",
" noise = torch.FloatTensor(u).data.normal_(0, policy_noise).to(device)\n",
" noise = noise.clamp(-noise_clip, noise_clip)\n",
" next_action = (self.actor_target(next_state) + noise).clamp(-self.max_action, self.max_action)\n",
"\n",
" # Compute the target Q value\n",
" target_Q1, target_Q2 = self.critic_target(next_state, next_action)\n",
" target_Q = torch.min(target_Q1, target_Q2)\n",
" target_Q = reward + (done * discount * target_Q).detach()\n",
"\n",
" # Get current Q estimates\n",
" current_Q1, current_Q2 = self.critic(state, action)\n",
"\n",
" # Compute critic loss\n",
" critic_loss = F.mse_loss(current_Q1, target_Q) + F.mse_loss(current_Q2, target_Q) \n",
"\n",
" # Optimize the critic\n",
" self.critic_optimizer.zero_grad()\n",
" critic_loss.backward()\n",
" self.critic_optimizer.step()\n",
"\n",
" # Delayed policy updates\n",
" if it % policy_freq == 0:\n",
"\n",
" # Compute actor loss\n",
" actor_loss = -self.critic.Q1(state, self.actor(state)).mean()\n",
"\n",
" # Optimize the actor \n",
" self.actor_optimizer.zero_grad()\n",
" actor_loss.backward()\n",
" self.actor_optimizer.step()\n",
"\n",
" # Update the frozen target models\n",
" for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):\n",
" target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)\n",
"\n",
" for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):\n",
" target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)\n",
"\n",
"\n",
" def save(self, filename, directory):\n",
" torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, filename))\n",
" torch.save(self.critic.state_dict(), '%s/%s_critic.pth' % (directory, filename))\n",
"\n",
"\n",
" def load(self, filename=\"best_avg\", directory=\"./saves\"):\n",
" self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, filename)))\n",
" self.critic.load_state_dict(torch.load('%s/%s_critic.pth' % (directory, filename)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Runner"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"class Runner():\n",
" \"\"\"Carries out the environment steps and adds experiences to memory\"\"\"\n",
" \n",
" def __init__(self, env, agent, replay_buffer):\n",
" \n",
" self.env = env\n",
" self.agent = agent\n",
" self.replay_buffer = replay_buffer\n",
" self.obs = env.reset()\n",
" self.done = False\n",
" \n",
" def next_step(self, episode_timesteps, noise=0.1):\n",
" \n",
" action = self.agent.select_action(np.array(self.obs), noise=0.1)\n",
" \n",
" # Perform action\n",
" new_obs, reward, done, _ = self.env.step(action) \n",
" done_bool = 0 if episode_timesteps + 1 == 200 else float(done)\n",
" \n",
" # Store data in replay buffer\n",
" replay_buffer.add((self.obs, new_obs, action, reward, done_bool))\n",
" \n",
" self.obs = new_obs\n",
" \n",
" if done:\n",
" self.obs = self.env.reset()\n",
" done = False\n",
" \n",
" return reward, True\n",
" \n",
" return reward, done"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Evaluate"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def evaluate_policy(policy, env, eval_episodes=100,render=False):\n",
" \"\"\"run several episodes using the best agent policy\n",
" \n",
" Args:\n",
" policy (agent): agent to evaluate\n",
" env (env): gym environment\n",
" eval_episodes (int): how many test episodes to run\n",
" render (bool): show training\n",
" \n",
" Returns:\n",
" avg_reward (float): average reward over the number of evaluations\n",
" \n",
" \"\"\"\n",
" \n",
" avg_reward = 0.\n",
" for i in range(eval_episodes):\n",
" obs = env.reset()\n",
" done = False\n",
" while not done:\n",
" if render:\n",
" env.render()\n",
" action = policy.select_action(np.array(obs), noise=0)\n",
" obs, reward, done, _ = env.step(action)\n",
" avg_reward += reward\n",
"\n",
" avg_reward /= eval_episodes\n",
"\n",
" print(\"\\n---------------------------------------\")\n",
" print(\"Evaluation over {:d} episodes: {:f}\" .format(eval_episodes, avg_reward))\n",
" print(\"---------------------------------------\")\n",
" return avg_reward"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Observation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def observe(env,replay_buffer, observation_steps):\n",
" \"\"\"run episodes while taking random actions and filling replay_buffer\n",
" \n",
" Args:\n",
" env (env): gym environment\n",
" replay_buffer(ReplayBuffer): buffer to store experience replay\n",
" observation_steps (int): how many steps to observe for\n",
" \n",
" \"\"\"\n",
" \n",
" time_steps = 0\n",
" obs = env.reset()\n",
" done = False\n",
"\n",
" while time_steps < observation_steps:\n",
" action = env.action_space.sample()\n",
" new_obs, reward, done, _ = env.step(action)\n",
"\n",
" replay_buffer.add((obs, new_obs, action, reward, done))\n",
"\n",
" obs = new_obs\n",
" time_steps += 1\n",
"\n",
" if done:\n",
" obs = env.reset()\n",
" done = False\n",
"\n",
" print(\"\\rPopulating Buffer {}/{}.\".format(time_steps, observation_steps), end=\"\")\n",
" sys.stdout.flush()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Train"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def train(agent, test_env):\n",
" \"\"\"Train the agent for exploration steps\n",
" \n",
" Args:\n",
" agent (Agent): agent to use\n",
" env (environment): gym environment\n",
" writer (SummaryWriter): tensorboard writer\n",
" exploration (int): how many training steps to run\n",
" \n",
" \"\"\"\n",
"\n",
" total_timesteps = 0\n",
" timesteps_since_eval = 0\n",
" episode_num = 0\n",
" episode_reward = 0\n",
" episode_timesteps = 0\n",
" done = False \n",
" obs = env.reset()\n",
" evaluations = []\n",
" rewards = []\n",
" best_avg = -2000\n",
" \n",
" writer = SummaryWriter(comment=\"-TD3_Baseline_HalfCheetah\")\n",
" \n",
" while total_timesteps < EXPLORATION:\n",
" \n",
" if done: \n",
"\n",
" if total_timesteps != 0: \n",
" rewards.append(episode_reward)\n",
" avg_reward = np.mean(rewards[-100:])\n",
" \n",
" writer.add_scalar(\"avg_reward\", avg_reward, total_timesteps)\n",
" writer.add_scalar(\"reward_step\", reward, total_timesteps)\n",
" writer.add_scalar(\"episode_reward\", episode_reward, total_timesteps)\n",
" \n",
" if best_avg < avg_reward:\n",
" best_avg = avg_reward\n",
" print(\"saving best model....\\n\")\n",
" agent.save(\"best_avg\",\"saves\")\n",
"\n",
" print(\"\\rTotal T: {:d} Episode Num: {:d} Reward: {:f} Avg Reward: {:f}\".format(\n",
" total_timesteps, episode_num, episode_reward, avg_reward), end=\"\")\n",
" sys.stdout.flush()\n",
"\n",
"\n",
" if avg_reward >= REWARD_THRESH:\n",
" break\n",
"\n",
" agent.train(replay_buffer, episode_timesteps, BATCH_SIZE, GAMMA, TAU, NOISE, NOISE_CLIP, POLICY_FREQUENCY)\n",
"\n",
" # Evaluate episode\n",
"# if timesteps_since_eval >= EVAL_FREQUENCY:\n",
"# timesteps_since_eval %= EVAL_FREQUENCY\n",
"# eval_reward = evaluate_policy(agent, test_env)\n",
"# evaluations.append(avg_reward)\n",
"# writer.add_scalar(\"eval_reward\", eval_reward, total_timesteps)\n",
"\n",
"# if best_avg < eval_reward:\n",
"# best_avg = eval_reward\n",
"# print(\"saving best model....\\n\")\n",
"# agent.save(\"best_avg\",\"saves\")\n",
"\n",
" episode_reward = 0\n",
" episode_timesteps = 0\n",
" episode_num += 1 \n",
"\n",
" reward, done = runner.next_step(episode_timesteps)\n",
" episode_reward += reward\n",
"\n",
" episode_timesteps += 1\n",
" total_timesteps += 1\n",
" timesteps_since_eval += 1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Config"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ENV = \"RoboschoolHalfCheetah-v1\"#\"Pendulum-v0\"\n",
"SEED = 0\n",
"OBSERVATION = 10000\n",
"EXPLORATION = 5000000\n",
"BATCH_SIZE = 100\n",
"GAMMA = 0.99\n",
"TAU = 0.005\n",
"NOISE = 0.2\n",
"NOISE_CLIP = 0.5\n",
"EXPLORE_NOISE = 0.1\n",
"POLICY_FREQUENCY = 2\n",
"EVAL_FREQUENCY = 5000\n",
"REWARD_THRESH = 8000"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Main"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"env = gym.make(ENV)\n",
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
"\n",
"# Set seeds\n",
"env.seed(SEED)\n",
"torch.manual_seed(SEED)\n",
"np.random.seed(SEED)\n",
"\n",
"state_dim = env.observation_space.shape[0]\n",
"action_dim = env.action_space.shape[0] \n",
"max_action = float(env.action_space.high[0])\n",
"\n",
"policy = TD3(state_dim, action_dim, max_action, env)\n",
"\n",
"replay_buffer = ReplayBuffer()\n",
"\n",
"runner = Runner(env, policy, replay_buffer)\n",
"\n",
"total_timesteps = 0\n",
"timesteps_since_eval = 0\n",
"episode_num = 0\n",
"done = True"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating Buffer 10000/10000."
]
}
],
"source": [
"# Populate replay buffer\n",
"observe(env, replay_buffer, OBSERVATION)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"saving best model....\n",
"\n",
"Total T: 20 Episode Num: 0 Reward: 16.743121 Avg Reward: 16.743121saving best model....\n",
"\n",
"Total T: 1514 Episode Num: 58 Reward: 21.769903 Avg Reward: 20.342496saving best model....\n",
"\n",
"Total T: 1536 Episode Num: 59 Reward: 22.781277 Avg Reward: 20.383142saving best model....\n",
"\n",
"Total T: 1582 Episode Num: 61 Reward: 17.289137 Avg Reward: 20.385368saving best model....\n",
"\n",
"Total T: 1630 Episode Num: 63 Reward: 19.862503 Avg Reward: 20.485710saving best model....\n",
"\n",
"Total T: 1650 Episode Num: 64 Reward: 21.866313 Avg Reward: 20.506950saving best model....\n",
"\n",
"Total T: 1675 Episode Num: 65 Reward: 26.819443 Avg Reward: 20.602593saving best model....\n",
"\n",
"Total T: 1700 Episode Num: 66 Reward: 23.843442 Avg Reward: 20.650964saving best model....\n",
"\n",
"Total T: 1726 Episode Num: 67 Reward: 21.144815 Avg Reward: 20.658227saving best model....\n",
"\n",
"Total T: 1750 Episode Num: 68 Reward: 24.633037 Avg Reward: 20.715833saving best model....\n",
"\n",
"Total T: 1773 Episode Num: 69 Reward: 21.303502 Avg Reward: 20.724228saving best model....\n",
"\n",
"Total T: 1796 Episode Num: 70 Reward: 22.716227 Avg Reward: 20.752284saving best model....\n",
"\n",
"Total T: 1819 Episode Num: 71 Reward: 20.989999 Avg Reward: 20.755586saving best model....\n",
"\n",
"Total T: 1843 Episode Num: 72 Reward: 23.784038 Avg Reward: 20.797072saving best model....\n",
"\n",
"Total T: 1865 Episode Num: 73 Reward: 23.528150 Avg Reward: 20.833978saving best model....\n",
"\n",
"Total T: 1909 Episode Num: 75 Reward: 19.643384 Avg Reward: 20.848522saving best model....\n",
"\n",
"Total T: 1931 Episode Num: 76 Reward: 24.425013 Avg Reward: 20.894970saving best model....\n",
"\n",
"Total T: 1996 Episode Num: 79 Reward: 23.952797 Avg Reward: 20.882782saving best model....\n",
"\n",
"Total T: 2106 Episode Num: 84 Reward: 24.645263 Avg Reward: 20.923955saving best model....\n",
"\n",
"Total T: 2129 Episode Num: 85 Reward: 23.699468 Avg Reward: 20.956229saving best model....\n",
"\n",
"Total T: 2156 Episode Num: 86 Reward: 26.974068 Avg Reward: 21.025399saving best model....\n",
"\n",
"Total T: 2180 Episode Num: 87 Reward: 25.211616 Avg Reward: 21.072970saving best model....\n",
"\n",
"Total T: 2204 Episode Num: 88 Reward: 23.616154 Avg Reward: 21.101545saving best model....\n",
"\n",
"Total T: 2232 Episode Num: 89 Reward: 22.625034 Avg Reward: 21.118473saving best model....\n",
"\n",
"Total T: 2258 Episode Num: 90 Reward: 25.570383 Avg Reward: 21.167395saving best model....\n",
"\n",
"Total T: 2365 Episode Num: 95 Reward: 20.145823 Avg Reward: 21.152679saving best model....\n",
"\n",
"Total T: 2387 Episode Num: 96 Reward: 24.185485 Avg Reward: 21.183945saving best model....\n",
"\n",
"Total T: 2408 Episode Num: 97 Reward: 22.592442 Avg Reward: 21.198317saving best model....\n",
"\n",
"Total T: 2430 Episode Num: 98 Reward: 21.265422 Avg Reward: 21.198995saving best model....\n",
"\n",
"Total T: 2451 Episode Num: 99 Reward: 22.979675 Avg Reward: 21.216802saving best model....\n",
"\n",
"Total T: 2493 Episode Num: 101 Reward: 21.581048 Avg Reward: 21.237202saving best model....\n",
"\n",
"Total T: 2516 Episode Num: 102 Reward: 18.065544 Avg Reward: 21.451748saving best model....\n",
"\n",
"Total T: 2549 Episode Num: 103 Reward: 23.354547 Avg Reward: 21.635259saving best model....\n",
"\n",
"Total T: 2568 Episode Num: 104 Reward: 18.699222 Avg Reward: 21.684347saving best model....\n",
"\n",
"Total T: 2590 Episode Num: 105 Reward: 23.184260 Avg Reward: 21.863509saving best model....\n",
"\n",
"Total T: 2607 Episode Num: 106 Reward: 17.421441 Avg Reward: 21.929896saving best model....\n",
"\n",
"Total T: 2628 Episode Num: 107 Reward: 22.084720 Avg Reward: 22.046856saving best model....\n",
"\n",
"Total T: 2673 Episode Num: 109 Reward: 24.765999 Avg Reward: 22.084884saving best model....\n",
"\n",
"Total T: 2693 Episode Num: 110 Reward: 20.889029 Avg Reward: 22.143954saving best model....\n",
"\n",
"Total T: 2737 Episode Num: 112 Reward: 18.132201 Avg Reward: 22.184750saving best model....\n",
"\n",
"Total T: 2779 Episode Num: 114 Reward: 20.979700 Avg Reward: 22.232221saving best model....\n",
"\n",
"Total T: 2800 Episode Num: 115 Reward: 23.987061 Avg Reward: 22.312318saving best model....\n",
"\n",
"Total T: 2822 Episode Num: 116 Reward: 22.098923 Avg Reward: 22.398968saving best model....\n",
"\n",
"Total T: 2844 Episode Num: 117 Reward: 20.284459 Avg Reward: 22.443698saving best model....\n",
"\n",
"Total T: 2865 Episode Num: 118 Reward: 23.519466 Avg Reward: 22.536493saving best model....\n",
"\n",
"Total T: 8075 Episode Num: 337 Reward: 22.603253 Avg Reward: 22.596228saving best model....\n",
"\n",
"Total T: 8102 Episode Num: 338 Reward: 24.827601 Avg Reward: 22.665031saving best model....\n",
"\n",
"Total T: 8125 Episode Num: 339 Reward: 20.482397 Avg Reward: 22.749411saving best model....\n",
"\n",
"Total T: 8154 Episode Num: 340 Reward: 29.937905 Avg Reward: 22.876361saving best model....\n",
"\n",
"Total T: 8178 Episode Num: 341 Reward: 21.904997 Avg Reward: 22.932600saving best model....\n",
"\n",
"Total T: 8205 Episode Num: 342 Reward: 30.164619 Avg Reward: 23.048934saving best model....\n",
"\n",
"Total T: 8231 Episode Num: 343 Reward: 22.462488 Avg Reward: 23.131638saving best model....\n",
"\n",
"Total T: 8269 Episode Num: 344 Reward: 38.728877 Avg Reward: 23.355593saving best model....\n",
"\n",
"Total T: 8323 Episode Num: 345 Reward: 49.055836 Avg Reward: 23.680787saving best model....\n",
"\n",
"Total T: 8353 Episode Num: 346 Reward: 29.906956 Avg Reward: 23.817346saving best model....\n",
"\n",
"Total T: 8400 Episode Num: 347 Reward: 42.409426 Avg Reward: 24.036897saving best model....\n",
"\n",
"Total T: 8422 Episode Num: 348 Reward: 17.867632 Avg Reward: 24.079333saving best model....\n",
"\n",
"Total T: 8444 Episode Num: 349 Reward: 17.905481 Avg Reward: 24.098456saving best model....\n",
"\n",
"Total T: 8473 Episode Num: 350 Reward: 27.287787 Avg Reward: 24.234559saving best model....\n",
"\n",
"Total T: 8535 Episode Num: 351 Reward: 38.570854 Avg Reward: 24.444101saving best model....\n",
"\n",
"Total T: 8579 Episode Num: 352 Reward: 33.785010 Avg Reward: 24.617698saving best model....\n",
"\n",
"Total T: 8608 Episode Num: 353 Reward: 29.897824 Avg Reward: 24.753943saving best model....\n",
"\n",
"Total T: 8641 Episode Num: 354 Reward: 36.745220 Avg Reward: 24.953284saving best model....\n",
"\n",
"Total T: 8689 Episode Num: 356 Reward: 16.709570 Avg Reward: 25.022410saving best model....\n",
"\n",
"Total T: 8728 Episode Num: 357 Reward: 42.953937 Avg Reward: 25.276442saving best model....\n",
"\n",
"Total T: 8753 Episode Num: 358 Reward: 20.088823 Avg Reward: 25.306779saving best model....\n",
"\n",
"Total T: 8780 Episode Num: 359 Reward: 22.650081 Avg Reward: 25.323501saving best model....\n",
"\n",
"Total T: 8808 Episode Num: 360 Reward: 27.804353 Avg Reward: 25.459005saving best model....\n",
"\n",
"Total T: 8841 Episode Num: 361 Reward: 40.678614 Avg Reward: 25.564849saving best model....\n",
"\n",
"Total T: 8870 Episode Num: 362 Reward: 34.146334 Avg Reward: 25.747527saving best model....\n",
"\n",
"Total T: 8899 Episode Num: 363 Reward: 22.897403 Avg Reward: 25.807958saving best model....\n",
"\n",
"Total T: 8935 Episode Num: 364 Reward: 27.907736 Avg Reward: 25.837514saving best model....\n",
"\n",
"Total T: 9021 Episode Num: 365 Reward: 43.411862 Avg Reward: 26.118753saving best model....\n",
"\n",
"Total T: 9060 Episode Num: 366 Reward: 43.043610 Avg Reward: 26.442973saving best model....\n",
"\n",
"Total T: 9110 Episode Num: 368 Reward: 21.099034 Avg Reward: 26.331570saving best model....\n",
"\n",
"Total T: 9153 Episode Num: 369 Reward: 44.194996 Avg Reward: 26.624449saving best model....\n",
"\n",
"Total T: 9193 Episode Num: 370 Reward: 32.725990 Avg Reward: 26.745011saving best model....\n",
"\n",
"Total T: 9225 Episode Num: 371 Reward: 29.689769 Avg Reward: 26.786023saving best model....\n",
"\n",
"Total T: 9271 Episode Num: 372 Reward: 25.130714 Avg Reward: 26.820340saving best model....\n",
"\n",
"Total T: 9302 Episode Num: 373 Reward: 33.233439 Avg Reward: 26.936862saving best model....\n",
"\n",
"Total T: 9336 Episode Num: 374 Reward: 32.769324 Avg Reward: 27.026555saving best model....\n",
"\n",
"Total T: 9368 Episode Num: 375 Reward: 26.749514 Avg Reward: 27.116073saving best model....\n",
"\n",
"Total T: 9515 Episode Num: 376 Reward: 88.880520 Avg Reward: 27.797828saving best model....\n",
"\n",
"Total T: 9542 Episode Num: 377 Reward: 25.058426 Avg Reward: 27.841152saving best model....\n",
"\n",
"Total T: 9655 Episode Num: 378 Reward: 86.434673 Avg Reward: 28.506462saving best model....\n",
"\n",
"Total T: 9682 Episode Num: 379 Reward: 21.743220 Avg Reward: 28.511879saving best model....\n",
"\n",
"Total T: 9734 Episode Num: 380 Reward: 48.613742 Avg Reward: 28.727580saving best model....\n",
"\n",
"Total T: 9806 Episode Num: 381 Reward: 28.639700 Avg Reward: 28.733419saving best model....\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total T: 10769 Episode Num: 382 Reward: 539.815619 Avg Reward: 33.930442saving best model....\n",
"\n",
"Total T: 11769 Episode Num: 383 Reward: 566.808983 Avg Reward: 39.388113saving best model....\n",
"\n",
"Total T: 11853 Episode Num: 384 Reward: 67.279419 Avg Reward: 39.876748saving best model....\n",
"\n",
"Total T: 11957 Episode Num: 385 Reward: 71.503203 Avg Reward: 40.310240saving best model....\n",
"\n",
"Total T: 12046 Episode Num: 386 Reward: 77.520739 Avg Reward: 40.875106saving best model....\n",
"\n",
"Total T: 12429 Episode Num: 387 Reward: 246.536070 Avg Reward: 43.146199saving best model....\n",
"\n",
"Total T: 13429 Episode Num: 388 Reward: 576.103617 Avg Reward: 48.686063saving best model....\n",
"\n",
"Total T: 13577 Episode Num: 391 Reward: 21.495391 Avg Reward: 48.472590saving best model....\n",
"\n",
"Total T: 13650 Episode Num: 392 Reward: 54.999420 Avg Reward: 48.824773saving best model....\n",
"\n",
"Total T: 14206 Episode Num: 393 Reward: 205.837476 Avg Reward: 50.679812saving best model....\n",
"\n",
"Total T: 14278 Episode Num: 394 Reward: 65.735856 Avg Reward: 51.097288saving best model....\n",
"\n",
"Total T: 14353 Episode Num: 395 Reward: 61.474123 Avg Reward: 51.429518saving best model....\n",
"\n",
"Total T: 14408 Episode Num: 396 Reward: 52.433851 Avg Reward: 51.622611saving best model....\n",
"\n",
"Total T: 14460 Episode Num: 397 Reward: 45.060072 Avg Reward: 51.838137saving best model....\n",
"\n",
"Total T: 14549 Episode Num: 398 Reward: 72.634526 Avg Reward: 52.288803saving best model....\n",
"\n",
"Total T: 14630 Episode Num: 399 Reward: 76.795412 Avg Reward: 52.838007saving best model....\n",
"\n",
"Total T: 14690 Episode Num: 400 Reward: 62.358343 Avg Reward: 53.129828saving best model....\n",
"\n",
"Total T: 14767 Episode Num: 401 Reward: 62.494705 Avg Reward: 53.537558saving best model....\n",
"\n",
"Total T: 15825 Episode Num: 403 Reward: 9.949623 Avg Reward: 53.7445628saving best model....\n",
"\n",
"Total T: 15975 Episode Num: 404 Reward: 93.993168 Avg Reward: 54.462463saving best model....\n",
"\n",
"Total T: 16053 Episode Num: 405 Reward: 77.928995 Avg Reward: 54.958081saving best model....\n",
"\n",
"Total T: 16120 Episode Num: 406 Reward: 58.549251 Avg Reward: 55.245463saving best model....\n",
"\n",
"Total T: 16500 Episode Num: 407 Reward: 117.493571 Avg Reward: 55.933688saving best model....\n",
"\n",
"Total T: 16703 Episode Num: 408 Reward: 86.696944 Avg Reward: 56.529788saving best model....\n",
"\n",
"Total T: 16845 Episode Num: 409 Reward: 91.451277 Avg Reward: 57.088445saving best model....\n",
"\n",
"Total T: 17246 Episode Num: 413 Reward: 33.851439 Avg Reward: 57.3138992saving best model....\n",
"\n",
"Total T: 19487 Episode Num: 457 Reward: 57.267046 Avg Reward: 57.9176036saving best model....\n",
"\n",
"Total T: 19537 Episode Num: 458 Reward: 51.000588 Avg Reward: 58.226720saving best model....\n",
"\n",
"Total T: 19585 Episode Num: 459 Reward: 50.714799 Avg Reward: 58.507367saving best model....\n",
"\n",
"Total T: 19623 Episode Num: 460 Reward: 40.609265 Avg Reward: 58.635417saving best model....\n",
"\n",
"Total T: 19722 Episode Num: 461 Reward: 69.569158 Avg Reward: 58.924322saving best model....\n",
"\n",
"Total T: 20143 Episode Num: 462 Reward: 201.738595 Avg Reward: 60.600245saving best model....\n",
"\n",
"Total T: 20196 Episode Num: 463 Reward: 54.189743 Avg Reward: 60.913168saving best model....\n",
"\n",
"Total T: 20256 Episode Num: 464 Reward: 57.838426 Avg Reward: 61.212475saving best model....\n",
"\n",
"Total T: 20407 Episode Num: 465 Reward: 101.113747 Avg Reward: 61.789494saving best model....\n",
"\n",
"Total T: 20466 Episode Num: 466 Reward: 65.217016 Avg Reward: 62.011228saving best model....\n",
"\n",
"Total T: 20550 Episode Num: 467 Reward: 84.866026 Avg Reward: 62.606770saving best model....\n",
"\n",
"Total T: 20643 Episode Num: 468 Reward: 91.502709 Avg Reward: 63.310807saving best model....\n",
"\n",
"Total T: 20744 Episode Num: 469 Reward: 86.479545 Avg Reward: 63.733653saving best model....\n",
"\n",
"Total T: 20822 Episode Num: 470 Reward: 73.225521 Avg Reward: 64.138648saving best model....\n",
"\n",
"Total T: 20898 Episode Num: 471 Reward: 75.831572 Avg Reward: 64.600066saving best model....\n",
"\n",
"Total T: 21136 Episode Num: 472 Reward: 148.298909 Avg Reward: 65.831748saving best model....\n",
"\n",
"Total T: 21246 Episode Num: 473 Reward: 59.654322 Avg Reward: 66.095957saving best model....\n",
"\n",
"Total T: 21366 Episode Num: 474 Reward: 47.413121 Avg Reward: 66.242395saving best model....\n",
"\n",
"Total T: 21555 Episode Num: 476 Reward: 40.351045 Avg Reward: 66.447763saving best model....\n",
"\n",
"Total T: 21694 Episode Num: 477 Reward: 96.448310 Avg Reward: 67.161662saving best model....\n",
"\n",
"Total T: 22016 Episode Num: 478 Reward: 207.061984 Avg Reward: 68.367935saving best model....\n",
"\n",
"Total T: 22231 Episode Num: 479 Reward: 141.467320 Avg Reward: 69.565176saving best model....\n",
"\n",
"Total T: 22365 Episode Num: 480 Reward: 102.487831 Avg Reward: 70.103917saving best model....\n",
"\n",
"Total T: 23637 Episode Num: 484 Reward: 593.477348 Avg Reward: 66.445456saving best model....\n",
"\n",
"Total T: 24637 Episode Num: 485 Reward: 710.934881 Avg Reward: 72.839772saving best model....\n",
"\n",
"Total T: 27672 Episode Num: 517 Reward: 62.596648 Avg Reward: 73.0511290saving best model....\n",
"\n",
"Total T: 27734 Episode Num: 518 Reward: 67.672126 Avg Reward: 73.781579saving best model....\n",
"\n",
"Total T: 27785 Episode Num: 519 Reward: 57.932271 Avg Reward: 74.421600saving best model....\n",
"\n",
"Total T: 27835 Episode Num: 520 Reward: 55.663959 Avg Reward: 74.996262saving best model....\n",
"\n",
"Total T: 27933 Episode Num: 521 Reward: 87.078968 Avg Reward: 75.914216saving best model....\n",
"\n",
"Total T: 27974 Episode Num: 522 Reward: 38.165231 Avg Reward: 76.343365saving best model....\n",
"\n",
"Total T: 28072 Episode Num: 523 Reward: 90.268251 Avg Reward: 77.319059saving best model....\n",
"\n",
"Total T: 28155 Episode Num: 524 Reward: 85.209868 Avg Reward: 78.244842saving best model....\n",
"\n",
"Total T: 28215 Episode Num: 525 Reward: 56.868200 Avg Reward: 78.846508saving best model....\n",
"\n",
"Total T: 28301 Episode Num: 526 Reward: 78.205942 Avg Reward: 79.673087saving best model....\n",
"\n",
"Total T: 28390 Episode Num: 527 Reward: 83.921447 Avg Reward: 80.536428saving best model....\n",
"\n",
"Total T: 28540 Episode Num: 528 Reward: 108.993770 Avg Reward: 81.581168saving best model....\n",
"\n",
"Total T: 28625 Episode Num: 529 Reward: 76.458615 Avg Reward: 82.325346saving best model....\n",
"\n",
"Total T: 28698 Episode Num: 530 Reward: 71.806796 Avg Reward: 82.979184saving best model....\n",
"\n",
"Total T: 28782 Episode Num: 531 Reward: 82.587037 Avg Reward: 83.819181saving best model....\n",
"\n",
"Total T: 28849 Episode Num: 532 Reward: 68.233095 Avg Reward: 84.466645saving best model....\n",
"\n",
"Total T: 28920 Episode Num: 533 Reward: 70.993316 Avg Reward: 85.110843saving best model....\n",
"\n",
"Total T: 29107 Episode Num: 534 Reward: 108.478852 Avg Reward: 86.136762saving best model....\n",
"\n",
"Total T: 29253 Episode Num: 536 Reward: 77.353549 Avg Reward: 86.674760saving best model....\n",
"\n",
"Total T: 29641 Episode Num: 537 Reward: 174.417830 Avg Reward: 88.247766saving best model....\n",
"\n",
"Total T: 29757 Episode Num: 538 Reward: 95.540528 Avg Reward: 88.763552saving best model....\n",
"\n",
"Total T: 30360 Episode Num: 539 Reward: 367.916024 Avg Reward: 92.314258saving best model....\n",
"\n",
"Total T: 30465 Episode Num: 540 Reward: 72.477318 Avg Reward: 92.576066saving best model....\n",
"\n",
"Total T: 30534 Episode Num: 541 Reward: 57.214366 Avg Reward: 93.041586saving best model....\n",
"\n",
"Total T: 30702 Episode Num: 542 Reward: 132.262688 Avg Reward: 94.028824saving best model....\n",
"\n",
"Total T: 30822 Episode Num: 543 Reward: 96.035913 Avg Reward: 94.374318saving best model....\n",
"\n",
"Total T: 32701 Episode Num: 563 Reward: 173.171367 Avg Reward: 92.100857saving best model....\n",
"\n",
"Total T: 34059 Episode Num: 566 Reward: 90.331956 Avg Reward: 94.6459222saving best model....\n",
"\n",
"Total T: 35353 Episode Num: 568 Reward: 58.279942 Avg Reward: 96.7399686saving best model....\n",
"\n",
"Total T: 35695 Episode Num: 569 Reward: 162.680436 Avg Reward: 97.501977saving best model....\n",
"\n",
"Total T: 37328 Episode Num: 572 Reward: 161.161587 Avg Reward: 99.629027saving best model....\n",
"\n",
"Total T: 37720 Episode Num: 573 Reward: 204.134413 Avg Reward: 101.073828saving best model....\n",
"\n",
"Total T: 37932 Episode Num: 574 Reward: 165.014384 Avg Reward: 102.249841saving best model....\n",
"\n",
"Total T: 38323 Episode Num: 575 Reward: 196.480934 Avg Reward: 103.256492saving best model....\n",
"\n",
"Total T: 38478 Episode Num: 576 Reward: 108.871301 Avg Reward: 103.941694saving best model....\n",
"\n",
"Total T: 39478 Episode Num: 577 Reward: 278.518448 Avg Reward: 105.762396saving best model....\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total T: 39806 Episode Num: 578 Reward: 208.145489 Avg Reward: 105.773231saving best model....\n",
"\n",
"Total T: 40153 Episode Num: 579 Reward: 188.060161 Avg Reward: 106.239159saving best model....\n",
"\n",
"Total T: 41153 Episode Num: 580 Reward: 555.741063 Avg Reward: 110.771692saving best model....\n",
"\n",
"Total T: 42153 Episode Num: 581 Reward: 569.504707 Avg Reward: 115.972529saving best model....\n",
"\n",
"Total T: 46021 Episode Num: 592 Reward: 198.298114 Avg Reward: 116.316214saving best model....\n",
"\n",
"Total T: 46418 Episode Num: 593 Reward: 210.076767 Avg Reward: 117.462085saving best model....\n",
"\n",
"Total T: 46844 Episode Num: 594 Reward: 246.176531 Avg Reward: 119.406595saving best model....\n",
"\n",
"Total T: 47203 Episode Num: 595 Reward: 209.325910 Avg Reward: 120.992479saving best model....\n",
"\n",
"Total T: 47523 Episode Num: 596 Reward: 185.038532 Avg Reward: 122.203928saving best model....\n",
"\n",
"Total T: 47685 Episode Num: 597 Reward: 84.376518 Avg Reward: 122.501402saving best model....\n",
"\n",
"Total T: 48022 Episode Num: 598 Reward: 197.779652 Avg Reward: 123.807743saving best model....\n",
"\n",
"Total T: 48373 Episode Num: 599 Reward: 219.670611 Avg Reward: 125.124664saving best model....\n",
"\n",
"Total T: 48818 Episode Num: 600 Reward: 180.068262 Avg Reward: 125.637730saving best model....\n",
"\n",
"Total T: 49314 Episode Num: 601 Reward: 232.243027 Avg Reward: 127.367441saving best model....\n",
"\n",
"Total T: 49668 Episode Num: 602 Reward: 196.362185 Avg Reward: 128.773097saving best model....\n",
"\n",
"Total T: 50057 Episode Num: 604 Reward: 72.392047 Avg Reward: 128.827502saving best model....\n",
"\n",
"Total T: 50557 Episode Num: 605 Reward: 163.868441 Avg Reward: 129.844544saving best model....\n",
"\n",
"Total T: 50787 Episode Num: 606 Reward: 154.744852 Avg Reward: 130.659464saving best model....\n",
"\n",
"Total T: 51078 Episode Num: 607 Reward: 174.716875 Avg Reward: 131.897640saving best model....\n",
"\n",
"Total T: 51385 Episode Num: 608 Reward: 187.696860 Avg Reward: 133.279146saving best model....\n",
"\n",
"Total T: 51704 Episode Num: 609 Reward: 174.131747 Avg Reward: 134.236059saving best model....\n",
"\n",
"Total T: 51998 Episode Num: 610 Reward: 178.286926 Avg Reward: 135.291577saving best model....\n",
"\n",
"Total T: 52282 Episode Num: 611 Reward: 175.440940 Avg Reward: 136.289370saving best model....\n",
"\n",
"Total T: 52632 Episode Num: 612 Reward: 213.872092 Avg Reward: 137.640046saving best model....\n",
"\n",
"Total T: 52945 Episode Num: 613 Reward: 179.088805 Avg Reward: 138.628769saving best model....\n",
"\n",
"Total T: 53261 Episode Num: 614 Reward: 180.717978 Avg Reward: 139.967138saving best model....\n",
"\n",
"Total T: 53673 Episode Num: 615 Reward: 221.511281 Avg Reward: 140.720320saving best model....\n",
"\n",
"Total T: 54017 Episode Num: 616 Reward: 178.833783 Avg Reward: 141.788851saving best model....\n",
"\n",
"Total T: 54342 Episode Num: 617 Reward: 168.209854 Avg Reward: 142.844983saving best model....\n",
"\n",
"Total T: 54676 Episode Num: 618 Reward: 181.625722 Avg Reward: 143.984519saving best model....\n",
"\n",
"Total T: 55164 Episode Num: 621 Reward: 38.381799 Avg Reward: 144.5121325saving best model....\n",
"\n",
"Total T: 56164 Episode Num: 622 Reward: 574.840826 Avg Reward: 149.878888saving best model....\n",
"\n",
"Total T: 58011 Episode Num: 627 Reward: 123.568904 Avg Reward: 152.010689saving best model....\n",
"\n",
"Total T: 59316 Episode Num: 631 Reward: 75.566247 Avg Reward: 151.8540930saving best model....\n",
"\n",
"Total T: 59685 Episode Num: 632 Reward: 160.998369 Avg Reward: 152.781745saving best model....\n",
"\n",
"Total T: 60631 Episode Num: 635 Reward: 92.106910 Avg Reward: 153.2261577saving best model....\n",
"\n",
"Total T: 94602 Episode Num: 787 Reward: 88.837455 Avg Reward: 153.6093017saving best model....\n",
"\n",
"Total T: 95602 Episode Num: 788 Reward: 627.306957 Avg Reward: 160.170133saving best model....\n",
"\n",
"Total T: 95680 Episode Num: 789 Reward: 81.199510 Avg Reward: 161.038349saving best model....\n",
"\n",
"Total T: 96680 Episode Num: 790 Reward: 726.985955 Avg Reward: 168.351230saving best model....\n",
"\n",
"Total T: 97027 Episode Num: 791 Reward: 214.464171 Avg Reward: 170.476207saving best model....\n",
"\n",
"Total T: 97315 Episode Num: 792 Reward: 164.787312 Avg Reward: 172.166890saving best model....\n",
"\n",
"Total T: 98180 Episode Num: 793 Reward: 499.563703 Avg Reward: 177.180258saving best model....\n",
"\n",
"Total T: 99180 Episode Num: 794 Reward: 641.903577 Avg Reward: 183.565071saving best model....\n",
"\n",
"Total T: 100180 Episode Num: 795 Reward: 669.655467 Avg Reward: 190.319001saving best model....\n",
"\n",
"Total T: 101180 Episode Num: 796 Reward: 752.240968 Avg Reward: 197.920959saving best model....\n",
"\n",
"Total T: 102180 Episode Num: 797 Reward: 761.280812 Avg Reward: 205.481511saving best model....\n",
"\n",
"Total T: 103180 Episode Num: 798 Reward: 623.072607 Avg Reward: 211.651707saving best model....\n",
"\n",
"Total T: 104180 Episode Num: 799 Reward: 782.477656 Avg Reward: 219.570758saving best model....\n",
"\n",
"Total T: 105180 Episode Num: 800 Reward: 779.456492 Avg Reward: 227.488873saving best model....\n",
"\n",
"Total T: 106180 Episode Num: 801 Reward: 781.362885 Avg Reward: 235.347343saving best model....\n",
"\n",
"Total T: 107180 Episode Num: 802 Reward: 775.136123 Avg Reward: 243.149804saving best model....\n",
"\n",
"Total T: 108180 Episode Num: 803 Reward: 774.094066 Avg Reward: 250.954661saving best model....\n",
"\n",
"Total T: 108644 Episode Num: 804 Reward: 338.825863 Avg Reward: 254.420116saving best model....\n",
"\n",
"Total T: 109644 Episode Num: 805 Reward: 613.417764 Avg Reward: 260.636433saving best model....\n",
"\n",
"Total T: 110080 Episode Num: 806 Reward: 273.777772 Avg Reward: 263.413542saving best model....\n",
"\n",
"Total T: 111080 Episode Num: 807 Reward: 657.352827 Avg Reward: 270.006090saving best model....\n",
"\n",
"Total T: 111431 Episode Num: 808 Reward: 226.134011 Avg Reward: 272.280073saving best model....\n",
"\n",
"Total T: 112431 Episode Num: 809 Reward: 701.014874 Avg Reward: 279.296688saving best model....\n",
"\n",
"Total T: 113431 Episode Num: 810 Reward: 648.278442 Avg Reward: 285.836800saving best model....\n",
"\n",
"Total T: 114431 Episode Num: 811 Reward: 671.086582 Avg Reward: 292.577366saving best model....\n",
"\n",
"Total T: 115431 Episode Num: 812 Reward: 389.517077 Avg Reward: 296.344534saving best model....\n",
"\n",
"Total T: 116431 Episode Num: 813 Reward: 589.032364 Avg Reward: 302.165522saving best model....\n",
"\n",
"Total T: 117431 Episode Num: 814 Reward: 296.571765 Avg Reward: 305.048346saving best model....\n",
"\n",
"Total T: 118033 Episode Num: 815 Reward: 358.732093 Avg Reward: 308.633917saving best model....\n",
"\n",
"Total T: 119009 Episode Num: 816 Reward: 345.556066 Avg Reward: 312.157789saving best model....\n",
"\n",
"Total T: 120009 Episode Num: 817 Reward: 428.745061 Avg Reward: 316.325602saving best model....\n",
"\n",
"Total T: 120811 Episode Num: 818 Reward: 466.206787 Avg Reward: 320.972867saving best model....\n",
"\n",
"Total T: 121811 Episode Num: 819 Reward: 645.154980 Avg Reward: 327.371958saving best model....\n",
"\n",
"Total T: 122811 Episode Num: 820 Reward: 749.890390 Avg Reward: 334.831932saving best model....\n",
"\n",
"Total T: 123811 Episode Num: 821 Reward: 513.603701 Avg Reward: 339.952398saving best model....\n",
"\n",
"Total T: 124811 Episode Num: 822 Reward: 704.736128 Avg Reward: 347.004822saving best model....\n",
"\n",
"Total T: 125811 Episode Num: 823 Reward: 596.104607 Avg Reward: 353.004368saving best model....\n",
"\n",
"Total T: 126811 Episode Num: 824 Reward: 771.726789 Avg Reward: 360.779569saving best model....\n",
"\n",
"Total T: 127811 Episode Num: 825 Reward: 774.429527 Avg Reward: 368.485279saving best model....\n",
"\n",
"Total T: 128811 Episode Num: 826 Reward: 776.687635 Avg Reward: 376.129347saving best model....\n",
"\n",
"Total T: 129811 Episode Num: 827 Reward: 786.188823 Avg Reward: 383.649707saving best model....\n",
"\n",
"Total T: 130811 Episode Num: 828 Reward: 781.769788 Avg Reward: 391.324391saving best model....\n",
"\n",
"Total T: 131811 Episode Num: 829 Reward: 778.572348 Avg Reward: 397.996367saving best model....\n",
"\n",
"Total T: 132811 Episode Num: 830 Reward: 779.322882 Avg Reward: 404.029274saving best model....\n",
"\n",
"Total T: 133811 Episode Num: 831 Reward: 686.317689 Avg Reward: 410.692927saving best model....\n",
"\n",
"Total T: 134811 Episode Num: 832 Reward: 624.585270 Avg Reward: 416.750915saving best model....\n",
"\n",
"Total T: 135811 Episode Num: 833 Reward: 666.012935 Avg Reward: 423.179106saving best model....\n",
"\n",
"Total T: 136811 Episode Num: 834 Reward: 628.673884 Avg Reward: 429.218067saving best model....\n",
"\n",
"Total T: 137656 Episode Num: 835 Reward: 508.240021 Avg Reward: 434.041061saving best model....\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total T: 138656 Episode Num: 836 Reward: 607.065039 Avg Reward: 439.941842saving best model....\n",
"\n",
"Total T: 139656 Episode Num: 837 Reward: 595.689970 Avg Reward: 445.555158saving best model....\n",
"\n",
"Total T: 140656 Episode Num: 838 Reward: 732.384281 Avg Reward: 452.465407saving best model....\n",
"\n",
"Total T: 141531 Episode Num: 839 Reward: 647.656644 Avg Reward: 459.453589saving best model....\n",
"\n",
"Total T: 142531 Episode Num: 840 Reward: 756.008799 Avg Reward: 465.000707saving best model....\n",
"\n",
"Total T: 143531 Episode Num: 841 Reward: 695.013929 Avg Reward: 471.457913saving best model....\n",
"\n",
"Total T: 144531 Episode Num: 842 Reward: 768.923176 Avg Reward: 478.003507saving best model....\n",
"\n",
"Total T: 145531 Episode Num: 843 Reward: 742.911920 Avg Reward: 483.338175saving best model....\n",
"\n",
"Total T: 146531 Episode Num: 844 Reward: 762.849011 Avg Reward: 488.611679saving best model....\n",
"\n",
"Total T: 147531 Episode Num: 845 Reward: 759.828828 Avg Reward: 492.855337saving best model....\n",
"\n",
"Total T: 148531 Episode Num: 846 Reward: 760.423366 Avg Reward: 498.781393saving best model....\n",
"\n",
"Total T: 149531 Episode Num: 847 Reward: 693.903102 Avg Reward: 503.894419saving best model....\n",
"\n",
"Total T: 150531 Episode Num: 848 Reward: 690.529362 Avg Reward: 509.918776saving best model....\n",
"\n",
"Total T: 151531 Episode Num: 849 Reward: 768.121156 Avg Reward: 516.182598saving best model....\n",
"\n",
"Total T: 152531 Episode Num: 850 Reward: 768.642842 Avg Reward: 516.759832saving best model....\n",
"\n",
"Total T: 154531 Episode Num: 852 Reward: 697.973138 Avg Reward: 521.283181saving best model....\n",
"\n",
"Total T: 155531 Episode Num: 853 Reward: 665.384594 Avg Reward: 523.956814saving best model....\n",
"\n",
"Total T: 156531 Episode Num: 854 Reward: 762.398970 Avg Reward: 527.921202saving best model....\n",
"\n",
"Total T: 169213 Episode Num: 871 Reward: 346.901259 Avg Reward: 527.493650saving best model....\n",
"\n",
"Total T: 171176 Episode Num: 874 Reward: 69.619570 Avg Reward: 531.3691755saving best model....\n",
"\n",
"Total T: 172176 Episode Num: 875 Reward: 736.132934 Avg Reward: 537.978233saving best model....\n",
"\n",
"Total T: 173176 Episode Num: 876 Reward: 741.795397 Avg Reward: 544.165684saving best model....\n",
"\n",
"Total T: 174058 Episode Num: 878 Reward: 180.599964 Avg Reward: 542.819475saving best model....\n",
"\n",
"Total T: 175058 Episode Num: 879 Reward: 755.526016 Avg Reward: 549.465812saving best model....\n",
"\n",
"Total T: 176058 Episode Num: 880 Reward: 768.825127 Avg Reward: 555.038449saving best model....\n",
"\n",
"Total T: 177058 Episode Num: 881 Reward: 783.613216 Avg Reward: 561.374477saving best model....\n",
"\n",
"Total T: 178058 Episode Num: 882 Reward: 628.747135 Avg Reward: 566.602984saving best model....\n",
"\n",
"Total T: 179058 Episode Num: 883 Reward: 790.460362 Avg Reward: 573.112686saving best model....\n",
"\n",
"Total T: 180058 Episode Num: 884 Reward: 752.781964 Avg Reward: 573.258240saving best model....\n",
"\n",
"Total T: 181058 Episode Num: 885 Reward: 579.352368 Avg Reward: 578.358996saving best model....\n",
"\n",
"Total T: 182058 Episode Num: 886 Reward: 772.846145 Avg Reward: 585.130737saving best model....\n",
"\n",
"Total T: 183058 Episode Num: 887 Reward: 791.617293 Avg Reward: 592.158536saving best model....\n",
"\n",
"Total T: 184058 Episode Num: 888 Reward: 771.091110 Avg Reward: 593.596377saving best model....\n",
"\n",
"Total T: 186058 Episode Num: 890 Reward: 634.438210 Avg Reward: 599.561875saving best model....\n",
"\n",
"Total T: 187058 Episode Num: 891 Reward: 592.944176 Avg Reward: 603.346675saving best model....\n",
"\n",
"Total T: 188058 Episode Num: 892 Reward: 753.950534 Avg Reward: 609.238307saving best model....\n",
"\n",
"Total T: 189058 Episode Num: 893 Reward: 778.486372 Avg Reward: 612.027534saving best model....\n",
"\n",
"Total T: 190058 Episode Num: 894 Reward: 729.012056 Avg Reward: 612.898618saving best model....\n",
"\n",
"Total T: 200832 Episode Num: 905 Reward: 598.843994 Avg Reward: 612.364402saving best model....\n",
"\n",
"Total T: 207218 Episode Num: 913 Reward: 567.725593 Avg Reward: 611.981382saving best model....\n",
"\n",
"Total T: 208420 Episode Num: 915 Reward: 142.749539 Avg Reward: 614.458512saving best model....\n",
"\n",
"Total T: 209420 Episode Num: 916 Reward: 600.423960 Avg Reward: 617.007191saving best model....\n",
"\n",
"Total T: 210420 Episode Num: 917 Reward: 584.272372 Avg Reward: 618.562464saving best model....\n",
"\n",
"Total T: 211420 Episode Num: 918 Reward: 631.155074 Avg Reward: 620.211947saving best model....\n",
"\n",
"Total T: 212420 Episode Num: 919 Reward: 700.045365 Avg Reward: 620.760851saving best model....\n",
"\n",
"Total T: 213420 Episode Num: 920 Reward: 751.913776 Avg Reward: 620.781085saving best model....\n",
"\n",
"Total T: 215420 Episode Num: 922 Reward: 638.740870 Avg Reward: 621.963664saving best model....\n",
"\n",
"Total T: 395578 Episode Num: 1136 Reward: 649.601751 Avg Reward: 621.347656saving best model....\n",
"\n",
"Total T: 396578 Episode Num: 1137 Reward: 708.990441 Avg Reward: 627.158660saving best model....\n",
"\n",
"Total T: 397578 Episode Num: 1138 Reward: 644.682781 Avg Reward: 630.052805saving best model....\n",
"\n",
"Total T: 398578 Episode Num: 1139 Reward: 786.658749 Avg Reward: 632.597688saving best model....\n",
"\n",
"Total T: 399578 Episode Num: 1140 Reward: 867.085626 Avg Reward: 635.357976saving best model....\n",
"\n",
"Total T: 400578 Episode Num: 1141 Reward: 779.503252 Avg Reward: 641.919438saving best model....\n",
"\n",
"Total T: 401578 Episode Num: 1142 Reward: 756.239944 Avg Reward: 644.276590saving best model....\n",
"\n",
"Total T: 402578 Episode Num: 1143 Reward: 734.565986 Avg Reward: 646.428746saving best model....\n",
"\n",
"Total T: 403578 Episode Num: 1144 Reward: 777.622045 Avg Reward: 651.599341saving best model....\n",
"\n",
"Total T: 406578 Episode Num: 1147 Reward: 795.899634 Avg Reward: 654.058628saving best model....\n",
"\n",
"Total T: 407578 Episode Num: 1148 Reward: 864.715466 Avg Reward: 654.760812saving best model....\n",
"\n",
"Total T: 408578 Episode Num: 1149 Reward: 764.133676 Avg Reward: 656.128315saving best model....\n",
"\n",
"Total T: 409578 Episode Num: 1150 Reward: 722.064514 Avg Reward: 657.523010saving best model....\n",
"\n",
"Total T: 410578 Episode Num: 1151 Reward: 822.624245 Avg Reward: 659.905185saving best model....\n",
"\n",
"Total T: 411578 Episode Num: 1152 Reward: 728.414011 Avg Reward: 660.737572saving best model....\n",
"\n",
"Total T: 412578 Episode Num: 1153 Reward: 835.283055 Avg Reward: 662.395637saving best model....\n",
"\n",
"Total T: 413578 Episode Num: 1154 Reward: 817.009424 Avg Reward: 664.247630saving best model....\n",
"\n",
"Total T: 414578 Episode Num: 1155 Reward: 811.311305 Avg Reward: 665.597196saving best model....\n",
"\n",
"Total T: 415578 Episode Num: 1156 Reward: 786.708018 Avg Reward: 666.069587saving best model....\n",
"\n",
"Total T: 416578 Episode Num: 1157 Reward: 788.983852 Avg Reward: 666.645146saving best model....\n",
"\n",
"Total T: 417578 Episode Num: 1158 Reward: 661.522098 Avg Reward: 667.019939saving best model....\n",
"\n",
"Total T: 418578 Episode Num: 1159 Reward: 729.820605 Avg Reward: 668.409907saving best model....\n",
"\n",
"Total T: 430578 Episode Num: 1171 Reward: 971.458339 Avg Reward: 668.622259saving best model....\n",
"\n",
"Total T: 431578 Episode Num: 1172 Reward: 911.477856 Avg Reward: 670.595334saving best model....\n",
"\n",
"Total T: 432578 Episode Num: 1173 Reward: 966.078742 Avg Reward: 673.229036saving best model....\n",
"\n",
"Total T: 433578 Episode Num: 1174 Reward: 940.914691 Avg Reward: 674.836664saving best model....\n",
"\n",
"Total T: 434578 Episode Num: 1175 Reward: 820.999144 Avg Reward: 675.333570saving best model....\n",
"\n",
"Total T: 436578 Episode Num: 1177 Reward: 775.876824 Avg Reward: 676.101941saving best model....\n",
"\n",
"Total T: 437578 Episode Num: 1178 Reward: 852.367392 Avg Reward: 678.068884saving best model....\n",
"\n",
"Total T: 438578 Episode Num: 1179 Reward: 797.859466 Avg Reward: 684.013201saving best model....\n",
"\n",
"Total T: 439578 Episode Num: 1180 Reward: 792.535825 Avg Reward: 690.947694saving best model....\n",
"\n",
"Total T: 440578 Episode Num: 1181 Reward: 769.837102 Avg Reward: 697.649949saving best model....\n",
"\n",
"Total T: 441578 Episode Num: 1182 Reward: 766.926211 Avg Reward: 699.189786saving best model....\n",
"\n",
"Total T: 442578 Episode Num: 1183 Reward: 793.089289 Avg Reward: 705.492816saving best model....\n",
"\n",
"Total T: 443578 Episode Num: 1184 Reward: 827.894763 Avg Reward: 705.544051saving best model....\n",
"\n",
"Total T: 445578 Episode Num: 1186 Reward: 683.352853 Avg Reward: 706.873818saving best model....\n",
"\n",
"Total T: 446578 Episode Num: 1187 Reward: 1018.927876 Avg Reward: 710.924138saving best model....\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total T: 447578 Episode Num: 1188 Reward: 792.753844 Avg Reward: 711.915058saving best model....\n",
"\n",
"Total T: 450578 Episode Num: 1191 Reward: 693.729834 Avg Reward: 712.214693saving best model....\n",
"\n",
"Total T: 451578 Episode Num: 1192 Reward: 815.165723 Avg Reward: 714.397943saving best model....\n",
"\n",
"Total T: 452578 Episode Num: 1193 Reward: 855.351908 Avg Reward: 719.103338saving best model....\n",
"\n",
"Total T: 479242 Episode Num: 1225 Reward: 655.502862 Avg Reward: 715.526317saving best model....\n",
"\n",
"Total T: 480242 Episode Num: 1226 Reward: 792.296167 Avg Reward: 722.797637saving best model....\n",
"\n",
"Total T: 483242 Episode Num: 1229 Reward: 796.779098 Avg Reward: 724.130064saving best model....\n",
"\n",
"Total T: 484242 Episode Num: 1230 Reward: 848.743813 Avg Reward: 724.889503saving best model....\n",
"\n",
"Total T: 485242 Episode Num: 1231 Reward: 922.743257 Avg Reward: 727.974447saving best model....\n",
"\n",
"Total T: 488242 Episode Num: 1234 Reward: 826.778749 Avg Reward: 730.109409saving best model....\n",
"\n",
"Total T: 489242 Episode Num: 1235 Reward: 701.188947 Avg Reward: 730.239606saving best model....\n",
"\n",
"Total T: 490242 Episode Num: 1236 Reward: 816.181276 Avg Reward: 731.905401saving best model....\n",
"\n",
"Total T: 491242 Episode Num: 1237 Reward: 849.544791 Avg Reward: 733.310945saving best model....\n",
"\n",
"Total T: 492242 Episode Num: 1238 Reward: 870.492120 Avg Reward: 735.569038saving best model....\n",
"\n",
"Total T: 493242 Episode Num: 1239 Reward: 867.974516 Avg Reward: 736.382196saving best model....\n",
"\n",
"Total T: 512242 Episode Num: 1258 Reward: 752.203607 Avg Reward: 736.945031saving best model....\n",
"\n",
"Total T: 513242 Episode Num: 1259 Reward: 815.165514 Avg Reward: 737.798480saving best model....\n",
"\n",
"Total T: 514242 Episode Num: 1260 Reward: 852.299877 Avg Reward: 738.815068saving best model....\n",
"\n",
"Total T: 515242 Episode Num: 1261 Reward: 819.679222 Avg Reward: 739.418269saving best model....\n",
"\n",
"Total T: 518242 Episode Num: 1264 Reward: 801.780432 Avg Reward: 740.006429saving best model....\n",
"\n",
"Total T: 519242 Episode Num: 1265 Reward: 926.175743 Avg Reward: 741.087535saving best model....\n",
"\n",
"Total T: 520242 Episode Num: 1266 Reward: 837.876381 Avg Reward: 742.443381saving best model....\n",
"\n",
"Total T: 521242 Episode Num: 1267 Reward: 827.456550 Avg Reward: 743.112672saving best model....\n",
"\n",
"Total T: 522242 Episode Num: 1268 Reward: 963.646703 Avg Reward: 743.741782saving best model....\n",
"\n",
"Total T: 543242 Episode Num: 1289 Reward: 922.867578 Avg Reward: 741.6915411saving best model....\n",
"\n",
"Total T: 544242 Episode Num: 1290 Reward: 1057.938789 Avg Reward: 746.300036saving best model....\n",
"\n",
"Total T: 545242 Episode Num: 1291 Reward: 1037.058065 Avg Reward: 749.733319saving best model....\n",
"\n",
"Total T: 546242 Episode Num: 1292 Reward: 1041.488839 Avg Reward: 751.996550saving best model....\n",
"\n",
"Total T: 547242 Episode Num: 1293 Reward: 1074.842421 Avg Reward: 754.191455saving best model....\n",
"\n",
"Total T: 548242 Episode Num: 1294 Reward: 932.343952 Avg Reward: 754.299039saving best model....\n",
"\n",
"Total T: 550005 Episode Num: 1296 Reward: 921.352108 Avg Reward: 760.605109saving best model....\n",
"\n",
"Total T: 551005 Episode Num: 1297 Reward: 952.664393 Avg Reward: 764.054141saving best model....\n",
"\n",
"Total T: 552005 Episode Num: 1298 Reward: 937.362327 Avg Reward: 765.729038saving best model....\n",
"\n",
"Total T: 553005 Episode Num: 1299 Reward: 1090.911131 Avg Reward: 772.431448saving best model....\n",
"\n",
"Total T: 554005 Episode Num: 1300 Reward: 1135.958969 Avg Reward: 781.455767saving best model....\n",
"\n",
"Total T: 555005 Episode Num: 1301 Reward: 1121.910155 Avg Reward: 792.578165saving best model....\n",
"\n",
"Total T: 556005 Episode Num: 1302 Reward: 1108.084628 Avg Reward: 800.977057saving best model....\n",
"\n",
"Total T: 557005 Episode Num: 1303 Reward: 1082.758387 Avg Reward: 811.729111saving best model....\n",
"\n",
"Total T: 558005 Episode Num: 1304 Reward: 1152.565287 Avg Reward: 823.142004saving best model....\n",
"\n",
"Total T: 559005 Episode Num: 1305 Reward: 1127.445844 Avg Reward: 830.929281saving best model....\n",
"\n",
"Total T: 560005 Episode Num: 1306 Reward: 1119.569741 Avg Reward: 840.932692saving best model....\n",
"\n",
"Total T: 561005 Episode Num: 1307 Reward: 1029.479529 Avg Reward: 850.394571saving best model....\n",
"\n",
"Total T: 562005 Episode Num: 1308 Reward: 1262.851958 Avg Reward: 855.328429saving best model....\n",
"\n",
"Total T: 563005 Episode Num: 1309 Reward: 1140.687564 Avg Reward: 859.220277saving best model....\n",
"\n",
"Total T: 564005 Episode Num: 1310 Reward: 1021.642922 Avg Reward: 863.302316saving best model....\n",
"\n",
"Total T: 565005 Episode Num: 1311 Reward: 993.327258 Avg Reward: 869.309899saving best model....\n",
"\n",
"Total T: 586005 Episode Num: 1332 Reward: 1034.832447 Avg Reward: 869.549966saving best model....\n",
"\n",
"Total T: 587005 Episode Num: 1333 Reward: 902.167605 Avg Reward: 870.772481saving best model....\n",
"\n",
"Total T: 588005 Episode Num: 1334 Reward: 931.154713 Avg Reward: 871.816241saving best model....\n",
"\n",
"Total T: 589005 Episode Num: 1335 Reward: 1137.122761 Avg Reward: 876.175579saving best model....\n",
"\n",
"Total T: 590005 Episode Num: 1336 Reward: 1036.170513 Avg Reward: 878.375471saving best model....\n",
"\n",
"Total T: 591005 Episode Num: 1337 Reward: 1164.927283 Avg Reward: 881.529296saving best model....\n",
"\n",
"Total T: 592005 Episode Num: 1338 Reward: 1121.567905 Avg Reward: 884.040054saving best model....\n",
"\n",
"Total T: 593005 Episode Num: 1339 Reward: 998.596896 Avg Reward: 885.346278saving best model....\n",
"\n",
"Total T: 594005 Episode Num: 1340 Reward: 1076.602315 Avg Reward: 886.457646saving best model....\n",
"\n",
"Total T: 595005 Episode Num: 1341 Reward: 1077.224323 Avg Reward: 889.750229saving best model....\n",
"\n",
"Total T: 596005 Episode Num: 1342 Reward: 1169.986189 Avg Reward: 895.944625saving best model....\n",
"\n",
"Total T: 597005 Episode Num: 1343 Reward: 846.093629 Avg Reward: 898.442255saving best model....\n",
"\n",
"Total T: 598005 Episode Num: 1344 Reward: 912.140225 Avg Reward: 900.870517saving best model....\n",
"\n",
"Total T: 599005 Episode Num: 1345 Reward: 998.977418 Avg Reward: 903.435651saving best model....\n",
"\n",
"Total T: 600005 Episode Num: 1346 Reward: 957.810337 Avg Reward: 906.599519saving best model....\n",
"\n",
"Total T: 601005 Episode Num: 1347 Reward: 1045.452156 Avg Reward: 910.834408saving best model....\n",
"\n",
"Total T: 602005 Episode Num: 1348 Reward: 1006.705847 Avg Reward: 912.641678saving best model....\n",
"\n",
"Total T: 603005 Episode Num: 1349 Reward: 1088.235410 Avg Reward: 914.639132saving best model....\n",
"\n",
"Total T: 604005 Episode Num: 1350 Reward: 1133.837508 Avg Reward: 916.304814saving best model....\n",
"\n",
"Total T: 605005 Episode Num: 1351 Reward: 1101.319154 Avg Reward: 918.642305saving best model....\n",
"\n",
"Total T: 606005 Episode Num: 1352 Reward: 1164.174945 Avg Reward: 921.339311saving best model....\n",
"\n",
"Total T: 607005 Episode Num: 1353 Reward: 1236.755023 Avg Reward: 925.942720saving best model....\n",
"\n",
"Total T: 608005 Episode Num: 1354 Reward: 1164.652817 Avg Reward: 929.599665saving best model....\n",
"\n",
"Total T: 609005 Episode Num: 1355 Reward: 1165.684755 Avg Reward: 933.718925saving best model....\n",
"\n",
"Total T: 610005 Episode Num: 1356 Reward: 1054.454487 Avg Reward: 936.074275saving best model....\n",
"\n",
"Total T: 611005 Episode Num: 1357 Reward: 1115.596398 Avg Reward: 938.943349saving best model....\n",
"\n",
"Total T: 612005 Episode Num: 1358 Reward: 1090.608486 Avg Reward: 942.327398saving best model....\n",
"\n",
"Total T: 613005 Episode Num: 1359 Reward: 1070.331658 Avg Reward: 944.879060saving best model....\n",
"\n",
"Total T: 614005 Episode Num: 1360 Reward: 1142.584467 Avg Reward: 947.781905saving best model....\n",
"\n",
"Total T: 615005 Episode Num: 1361 Reward: 1187.418647 Avg Reward: 951.459300saving best model....\n",
"\n",
"Total T: 616005 Episode Num: 1362 Reward: 918.100151 Avg Reward: 953.086016saving best model....\n",
"\n",
"Total T: 617005 Episode Num: 1363 Reward: 1222.887686 Avg Reward: 958.311620saving best model....\n",
"\n",
"Total T: 618005 Episode Num: 1364 Reward: 1011.881800 Avg Reward: 960.412634saving best model....\n",
"\n",
"Total T: 619005 Episode Num: 1365 Reward: 1050.040623 Avg Reward: 961.651283saving best model....\n",
"\n",
"Total T: 620005 Episode Num: 1366 Reward: 989.660661 Avg Reward: 963.169126saving best model....\n",
"\n",
"Total T: 621005 Episode Num: 1367 Reward: 1131.981404 Avg Reward: 966.214374saving best model....\n",
"\n",
"Total T: 622005 Episode Num: 1368 Reward: 1058.738071 Avg Reward: 967.165288saving best model....\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total T: 623005 Episode Num: 1369 Reward: 1187.826921 Avg Reward: 969.779982saving best model....\n",
"\n",
"Total T: 624005 Episode Num: 1370 Reward: 1163.409350 Avg Reward: 973.381144saving best model....\n",
"\n",
"Total T: 625005 Episode Num: 1371 Reward: 918.428779 Avg Reward: 975.207919saving best model....\n",
"\n",
"Total T: 626005 Episode Num: 1372 Reward: 1244.181364 Avg Reward: 980.535153saving best model....\n",
"\n",
"Total T: 627005 Episode Num: 1373 Reward: 1105.513651 Avg Reward: 985.010907saving best model....\n",
"\n",
"Total T: 628005 Episode Num: 1374 Reward: 1131.012411 Avg Reward: 988.240308saving best model....\n",
"\n",
"Total T: 728909 Episode Num: 1484 Reward: 1428.454555 Avg Reward: 981.490534saving best model....\n",
"\n",
"Total T: 729909 Episode Num: 1485 Reward: 1434.856087 Avg Reward: 993.059802saving best model....\n",
"\n",
"Total T: 730909 Episode Num: 1486 Reward: 1137.569908 Avg Reward: 999.046120saving best model....\n",
"\n",
"Total T: 731909 Episode Num: 1487 Reward: 1443.342884 Avg Reward: 1009.637547saving best model....\n",
"\n",
"Total T: 732909 Episode Num: 1488 Reward: 954.222501 Avg Reward: 1014.775019saving best model....\n",
"\n",
"Total T: 733909 Episode Num: 1489 Reward: 1313.226783 Avg Reward: 1025.283603saving best model....\n",
"\n",
"Total T: 734909 Episode Num: 1490 Reward: 1519.503919 Avg Reward: 1040.227172saving best model....\n",
"\n",
"Total T: 736114 Episode Num: 1492 Reward: 254.569992 Avg Reward: 1049.2804163saving best model....\n",
"\n",
"Total T: 737114 Episode Num: 1493 Reward: 1504.772089 Avg Reward: 1060.317251saving best model....\n",
"\n",
"Total T: 738114 Episode Num: 1494 Reward: 1394.658359 Avg Reward: 1067.194087saving best model....\n",
"\n",
"Total T: 739114 Episode Num: 1495 Reward: 1463.194232 Avg Reward: 1074.199219saving best model....\n",
"\n",
"Total T: 740114 Episode Num: 1496 Reward: 1400.722019 Avg Reward: 1080.502140saving best model....\n",
"\n",
"Total T: 740954 Episode Num: 1497 Reward: 1190.432086 Avg Reward: 1084.683157saving best model....\n",
"\n",
"Total T: 743257 Episode Num: 1501 Reward: 1200.082435 Avg Reward: 1089.859694saving best model....\n",
"\n",
"Total T: 744257 Episode Num: 1502 Reward: 1458.376261 Avg Reward: 1100.004892saving best model....\n",
"\n",
"Total T: 745257 Episode Num: 1503 Reward: 1439.925858 Avg Reward: 1106.625293saving best model....\n",
"\n",
"Total T: 746257 Episode Num: 1504 Reward: 1556.926879 Avg Reward: 1115.691109saving best model....\n",
"\n",
"Total T: 747257 Episode Num: 1505 Reward: 1439.282788 Avg Reward: 1122.832899saving best model....\n",
"\n",
"Total T: 748257 Episode Num: 1506 Reward: 1369.395746 Avg Reward: 1129.501767saving best model....\n",
"\n",
"Total T: 749257 Episode Num: 1507 Reward: 1254.555832 Avg Reward: 1135.847085saving best model....\n",
"\n",
"Total T: 750257 Episode Num: 1508 Reward: 1373.900762 Avg Reward: 1139.422490saving best model....\n",
"\n",
"Total T: 751257 Episode Num: 1509 Reward: 1560.255871 Avg Reward: 1145.043701saving best model....\n",
"\n",
"Total T: 752257 Episode Num: 1510 Reward: 1385.998813 Avg Reward: 1149.252273saving best model....\n",
"\n",
"Total T: 767711 Episode Num: 1529 Reward: 1433.754504 Avg Reward: 1148.444785saving best model....\n",
"\n",
"Total T: 768711 Episode Num: 1530 Reward: 1528.218003 Avg Reward: 1157.587046saving best model....\n",
"\n",
"Total T: 776269 Episode Num: 1539 Reward: 1375.600956 Avg Reward: 1161.774089saving best model....\n",
"\n",
"Total T: 777269 Episode Num: 1540 Reward: 1567.416917 Avg Reward: 1164.238443saving best model....\n",
"\n",
"Total T: 778269 Episode Num: 1541 Reward: 1336.661864 Avg Reward: 1167.435113saving best model....\n",
"\n",
"Total T: 778464 Episode Num: 1542 Reward: 211.609520 Avg Reward: 1168.458280saving best model....\n",
"\n",
"Total T: 779464 Episode Num: 1543 Reward: 1462.497009 Avg Reward: 1171.463078saving best model....\n",
"\n",
"Total T: 780464 Episode Num: 1544 Reward: 1463.339541 Avg Reward: 1173.436844saving best model....\n",
"\n",
"Total T: 781464 Episode Num: 1545 Reward: 1401.733391 Avg Reward: 1176.629315saving best model....\n",
"\n",
"Total T: 782464 Episode Num: 1546 Reward: 1476.251947 Avg Reward: 1178.635507saving best model....\n",
"\n",
"Total T: 786505 Episode Num: 1551 Reward: 1464.648327 Avg Reward: 1177.506992saving best model....\n",
"\n",
"Total T: 787505 Episode Num: 1552 Reward: 1532.297859 Avg Reward: 1181.621506saving best model....\n",
"\n",
"Total T: 788505 Episode Num: 1553 Reward: 1538.141153 Avg Reward: 1185.963275saving best model....\n",
"\n",
"Total T: 802213 Episode Num: 1571 Reward: 1580.411465 Avg Reward: 1185.835794saving best model....\n",
"\n",
"Total T: 807828 Episode Num: 1578 Reward: 1593.818597 Avg Reward: 1187.904938saving best model....\n",
"\n",
"Total T: 815024 Episode Num: 1587 Reward: 1567.160422 Avg Reward: 1193.069525saving best model....\n",
"\n",
"Total T: 816024 Episode Num: 1588 Reward: 1529.357766 Avg Reward: 1198.820877saving best model....\n",
"\n",
"Total T: 817024 Episode Num: 1589 Reward: 1649.595876 Avg Reward: 1202.184568saving best model....\n",
"\n",
"Total T: 818024 Episode Num: 1590 Reward: 1709.176049 Avg Reward: 1204.081290saving best model....\n",
"\n",
"Total T: 819024 Episode Num: 1591 Reward: 1634.154944 Avg Reward: 1207.236369saving best model....\n",
"\n",
"Total T: 821024 Episode Num: 1593 Reward: 1452.935746 Avg Reward: 1218.526549saving best model....\n",
"\n",
"Total T: 822024 Episode Num: 1594 Reward: 1628.610742 Avg Reward: 1220.866073saving best model....\n",
"\n",
"Total T: 823024 Episode Num: 1595 Reward: 1566.626819 Avg Reward: 1221.900399saving best model....\n",
"\n",
"Total T: 824024 Episode Num: 1596 Reward: 1626.049313 Avg Reward: 1224.153672saving best model....\n",
"\n",
"Total T: 825334 Episode Num: 1599 Reward: 69.270429 Avg Reward: 1215.29225912saving best model....\n",
"\n",
"Total T: 827078 Episode Num: 1601 Reward: 1099.612406 Avg Reward: 1228.749391saving best model....\n",
"\n",
"Total T: 828078 Episode Num: 1602 Reward: 1607.307436 Avg Reward: 1230.238703saving best model....\n",
"\n",
"Total T: 829078 Episode Num: 1603 Reward: 1683.023873 Avg Reward: 1232.669683saving best model....\n",
"\n",
"Total T: 830078 Episode Num: 1604 Reward: 1598.905086 Avg Reward: 1233.089465saving best model....\n",
"\n",
"Total T: 831078 Episode Num: 1605 Reward: 1740.011000 Avg Reward: 1236.096747saving best model....\n",
"\n",
"Total T: 832078 Episode Num: 1606 Reward: 1659.521818 Avg Reward: 1238.998008saving best model....\n",
"\n",
"Total T: 833078 Episode Num: 1607 Reward: 1617.895272 Avg Reward: 1242.631403saving best model....\n",
"\n",
"Total T: 834078 Episode Num: 1608 Reward: 1885.542293 Avg Reward: 1247.747818saving best model....\n",
"\n",
"Total T: 836172 Episode Num: 1611 Reward: 1746.152163 Avg Reward: 1242.919017saving best model....\n",
"\n",
"Total T: 837172 Episode Num: 1612 Reward: 1843.710465 Avg Reward: 1260.239522saving best model....\n",
"\n",
"Total T: 838172 Episode Num: 1613 Reward: 1662.295215 Avg Reward: 1271.448373saving best model....\n",
"\n",
"Total T: 839172 Episode Num: 1614 Reward: 1715.570324 Avg Reward: 1287.682170saving best model....\n",
"\n",
"Total T: 840172 Episode Num: 1615 Reward: 1730.398079 Avg Reward: 1291.073057saving best model....\n",
"\n",
"Total T: 843362 Episode Num: 1619 Reward: 1710.500317 Avg Reward: 1297.436787saving best model....\n",
"\n",
"Total T: 844362 Episode Num: 1620 Reward: 1929.621518 Avg Reward: 1302.610391saving best model....\n",
"\n",
"Total T: 846703 Episode Num: 1624 Reward: 1874.738517 Avg Reward: 1301.969080saving best model....\n",
"\n",
"Total T: 847703 Episode Num: 1625 Reward: 1612.327840 Avg Reward: 1307.380700saving best model....\n",
"\n",
"Total T: 943968 Episode Num: 1785 Reward: 1827.621067 Avg Reward: 1300.053038saving best model....\n",
"\n",
"Total T: 944968 Episode Num: 1786 Reward: 1925.877309 Avg Reward: 1319.447488saving best model....\n",
"\n",
"Total T: 945968 Episode Num: 1787 Reward: 1815.236786 Avg Reward: 1337.795314saving best model....\n",
"\n",
"Total T: 946968 Episode Num: 1788 Reward: 1880.512984 Avg Reward: 1356.805697saving best model....\n",
"\n",
"Total T: 947968 Episode Num: 1789 Reward: 1536.977529 Avg Reward: 1375.307590saving best model....\n",
"\n",
"Total T: 948968 Episode Num: 1790 Reward: 1931.554997 Avg Reward: 1394.704101saving best model....\n",
"\n",
"Total T: 949968 Episode Num: 1791 Reward: 1788.989560 Avg Reward: 1412.678762saving best model....\n",
"\n",
"Total T: 950968 Episode Num: 1792 Reward: 1867.520271 Avg Reward: 1428.788225saving best model....\n",
"\n",
"Total T: 951968 Episode Num: 1793 Reward: 1892.033568 Avg Reward: 1447.770016saving best model....\n",
"\n",
"Total T: 952968 Episode Num: 1794 Reward: 1746.062162 Avg Reward: 1460.364522saving best model....\n",
"\n",
"Total T: 953968 Episode Num: 1795 Reward: 1936.241493 Avg Reward: 1473.498867saving best model....\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total T: 955102 Episode Num: 1797 Reward: 133.835966 Avg Reward: 1482.105524saving best model....\n",
"\n",
"Total T: 956102 Episode Num: 1798 Reward: 2013.254597 Avg Reward: 1496.234026saving best model....\n",
"\n",
"Total T: 957102 Episode Num: 1799 Reward: 1892.994992 Avg Reward: 1511.288989saving best model....\n",
"\n",
"Total T: 958102 Episode Num: 1800 Reward: 1969.281886 Avg Reward: 1523.843636saving best model....\n",
"\n",
"Total T: 959102 Episode Num: 1801 Reward: 1950.789872 Avg Reward: 1536.192049saving best model....\n",
"\n",
"Total T: 960102 Episode Num: 1802 Reward: 1842.398237 Avg Reward: 1552.870527saving best model....\n",
"\n",
"Total T: 961102 Episode Num: 1803 Reward: 2010.537885 Avg Reward: 1564.873240saving best model....\n",
"\n",
"Total T: 962102 Episode Num: 1804 Reward: 2031.673085 Avg Reward: 1577.184046saving best model....\n",
"\n",
"Total T: 963102 Episode Num: 1805 Reward: 1479.962049 Avg Reward: 1584.201529saving best model....\n",
"\n",
"Total T: 964102 Episode Num: 1806 Reward: 1903.268789 Avg Reward: 1591.911864saving best model....\n",
"\n",
"Total T: 967929 Episode Num: 1812 Reward: 1783.964173 Avg Reward: 1598.642516saving best model....\n",
"\n",
"Total T: 968929 Episode Num: 1813 Reward: 2036.052431 Avg Reward: 1607.942555saving best model....\n",
"\n",
"Total T: 969929 Episode Num: 1814 Reward: 1525.355240 Avg Reward: 1613.799337saving best model....\n",
"\n",
"Total T: 970929 Episode Num: 1815 Reward: 1645.239215 Avg Reward: 1615.935759saving best model....\n",
"\n",
"Total T: 971929 Episode Num: 1816 Reward: 2015.619890 Avg Reward: 1619.912874saving best model....\n",
"\n",
"Total T: 972929 Episode Num: 1817 Reward: 1873.721952 Avg Reward: 1623.680393saving best model....\n",
"\n",
"Total T: 975301 Episode Num: 1820 Reward: 1927.221463 Avg Reward: 1618.036414saving best model....\n",
"\n",
"Total T: 976301 Episode Num: 1821 Reward: 1856.994684 Avg Reward: 1626.626127saving best model....\n",
"\n",
"Total T: 977301 Episode Num: 1822 Reward: 1872.479950 Avg Reward: 1642.328959saving best model....\n",
"\n",
"Total T: 978301 Episode Num: 1823 Reward: 1985.637439 Avg Reward: 1660.829056saving best model....\n",
"\n",
"Total T: 979301 Episode Num: 1824 Reward: 1882.712502 Avg Reward: 1664.755714saving best model....\n",
"\n",
"Total T: 980071 Episode Num: 1825 Reward: 1528.557868 Avg Reward: 1675.992482saving best model....\n",
"\n",
"Total T: 981071 Episode Num: 1826 Reward: 1950.513837 Avg Reward: 1684.273888saving best model....\n",
"\n",
"Total T: 982071 Episode Num: 1827 Reward: 1928.851329 Avg Reward: 1686.083388saving best model....\n",
"\n",
"Total T: 983071 Episode Num: 1828 Reward: 1884.049862 Avg Reward: 1687.431929saving best model....\n",
"\n",
"Total T: 984071 Episode Num: 1829 Reward: 1917.209568 Avg Reward: 1690.729429saving best model....\n",
"\n",
"Total T: 985071 Episode Num: 1830 Reward: 1908.401949 Avg Reward: 1694.573996saving best model....\n",
"\n",
"Total T: 986071 Episode Num: 1831 Reward: 2018.054154 Avg Reward: 1700.164403saving best model....\n",
"\n",
"Total T: 988071 Episode Num: 1833 Reward: 1716.781755 Avg Reward: 1700.977108saving best model....\n",
"\n",
"Total T: 989071 Episode Num: 1834 Reward: 1735.771069 Avg Reward: 1703.870580saving best model....\n",
"\n",
"Total T: 990071 Episode Num: 1835 Reward: 1937.821126 Avg Reward: 1706.492930saving best model....\n",
"\n",
"Total T: 1008126 Episode Num: 1856 Reward: 162.335238 Avg Reward: 1697.9141416saving best model....\n",
"\n",
"Total T: 1010045 Episode Num: 1858 Reward: 1683.987101 Avg Reward: 1713.688646saving best model....\n",
"\n",
"Total T: 1011045 Episode Num: 1859 Reward: 1899.892905 Avg Reward: 1715.825510saving best model....\n",
"\n",
"Total T: 1012045 Episode Num: 1860 Reward: 1927.730235 Avg Reward: 1716.912025saving best model....\n",
"\n",
"Total T: 1046356 Episode Num: 1896 Reward: 2056.256715 Avg Reward: 1701.355758saving best model....\n",
"\n",
"Total T: 1103394 Episode Num: 1961 Reward: 2142.402567 Avg Reward: 1709.062279saving best model....\n",
"\n",
"Total T: 1104100 Episode Num: 1962 Reward: 1423.517726 Avg Reward: 1720.439349saving best model....\n",
"\n",
"Total T: 1117663 Episode Num: 1976 Reward: 2052.184967 Avg Reward: 1720.660141saving best model....\n",
"\n",
"Total T: 1120663 Episode Num: 1979 Reward: 2000.589526 Avg Reward: 1721.762381saving best model....\n",
"\n",
"Total T: 1121663 Episode Num: 1980 Reward: 2116.956713 Avg Reward: 1724.367843saving best model....\n",
"\n",
"Total T: 1122663 Episode Num: 1981 Reward: 2185.936063 Avg Reward: 1728.441790saving best model....\n",
"\n",
"Total T: 1123663 Episode Num: 1982 Reward: 2116.577776 Avg Reward: 1731.218534saving best model....\n",
"\n",
"Total T: 1124663 Episode Num: 1983 Reward: 2217.289103 Avg Reward: 1735.182137saving best model....\n",
"\n",
"Total T: 1125663 Episode Num: 1984 Reward: 2212.460464 Avg Reward: 1737.764281saving best model....\n",
"\n",
"Total T: 1126663 Episode Num: 1985 Reward: 2094.186390 Avg Reward: 1741.068408saving best model....\n",
"\n",
"Total T: 1131763 Episode Num: 1991 Reward: 719.274058 Avg Reward: 1730.2249504saving best model....\n",
"\n",
"Total T: 1132763 Episode Num: 1992 Reward: 2059.926626 Avg Reward: 1749.475737saving best model....\n",
"\n",
"Total T: 1134763 Episode Num: 1994 Reward: 2005.944271 Avg Reward: 1752.050513saving best model....\n",
"\n",
"Total T: 1135763 Episode Num: 1995 Reward: 2163.437762 Avg Reward: 1755.731738saving best model....\n",
"\n",
"Total T: 1136763 Episode Num: 1996 Reward: 2133.015577 Avg Reward: 1756.499327saving best model....\n",
"\n",
"Total T: 1137763 Episode Num: 1997 Reward: 2098.084008 Avg Reward: 1757.933058saving best model....\n",
"\n",
"Total T: 1138763 Episode Num: 1998 Reward: 2184.859372 Avg Reward: 1776.603584saving best model....\n",
"\n",
"Total T: 1139763 Episode Num: 1999 Reward: 2159.819747 Avg Reward: 1776.975249saving best model....\n",
"\n",
"Total T: 1140763 Episode Num: 2000 Reward: 2084.308219 Avg Reward: 1777.999119saving best model....\n",
"\n",
"Total T: 1141763 Episode Num: 2001 Reward: 2032.529747 Avg Reward: 1778.170707saving best model....\n",
"\n",
"Total T: 1142763 Episode Num: 2002 Reward: 1903.058610 Avg Reward: 1797.093604saving best model....\n",
"\n",
"Total T: 1143763 Episode Num: 2003 Reward: 1981.396048 Avg Reward: 1816.749969saving best model....\n",
"\n",
"Total T: 1144763 Episode Num: 2004 Reward: 2085.454800 Avg Reward: 1818.319394saving best model....\n",
"\n",
"Total T: 1146763 Episode Num: 2006 Reward: 937.491220 Avg Reward: 1823.3627625saving best model....\n",
"\n",
"Total T: 1147763 Episode Num: 2007 Reward: 1985.179108 Avg Reward: 1824.447361saving best model....\n",
"\n",
"Total T: 1148763 Episode Num: 2008 Reward: 1904.669541 Avg Reward: 1826.739413saving best model....\n",
"\n",
"Total T: 1149763 Episode Num: 2009 Reward: 2078.590195 Avg Reward: 1847.364731saving best model....\n",
"\n",
"Total T: 1151763 Episode Num: 2011 Reward: 1824.806017 Avg Reward: 1867.787146saving best model....\n",
"\n",
"Total T: 1152763 Episode Num: 2012 Reward: 2138.167960 Avg Reward: 1871.408806saving best model....\n",
"\n",
"Total T: 1153763 Episode Num: 2013 Reward: 1969.463616 Avg Reward: 1872.331094saving best model....\n",
"\n",
"Total T: 1154763 Episode Num: 2014 Reward: 2154.504187 Avg Reward: 1874.285120saving best model....\n",
"\n",
"Total T: 1155763 Episode Num: 2015 Reward: 2160.355579 Avg Reward: 1876.028189saving best model....\n",
"\n",
"Total T: 1156763 Episode Num: 2016 Reward: 1951.743862 Avg Reward: 1877.050552saving best model....\n",
"\n",
"Total T: 1158763 Episode Num: 2018 Reward: 2044.493289 Avg Reward: 1878.960390saving best model....\n",
"\n",
"Total T: 1159763 Episode Num: 2019 Reward: 2034.834059 Avg Reward: 1880.888255saving best model....\n",
"\n",
"Total T: 1160763 Episode Num: 2020 Reward: 2092.946362 Avg Reward: 1901.671013saving best model....\n",
"\n",
"Total T: 1165320 Episode Num: 2025 Reward: 2005.365287 Avg Reward: 1918.611286saving best model....\n",
"\n",
"Total T: 1166320 Episode Num: 2026 Reward: 1909.152823 Avg Reward: 1926.859454saving best model....\n",
"\n",
"Total T: 1182478 Episode Num: 2043 Reward: 2097.861132 Avg Reward: 1929.429959saving best model....\n",
"\n",
"Total T: 1183478 Episode Num: 2044 Reward: 2255.397897 Avg Reward: 1933.135084saving best model....\n",
"\n",
"Total T: 1184478 Episode Num: 2045 Reward: 2287.414460 Avg Reward: 1938.861168saving best model....\n",
"\n",
"Total T: 1185478 Episode Num: 2046 Reward: 1817.681244 Avg Reward: 1943.034470saving best model....\n",
"\n",
"Total T: 1278653 Episode Num: 2147 Reward: 2095.636704 Avg Reward: 1940.963032saving best model....\n",
"\n",
"Total T: 1280653 Episode Num: 2149 Reward: 1993.272350 Avg Reward: 1952.655975saving best model....\n",
"\n",
"Total T: 1281653 Episode Num: 2150 Reward: 2211.595190 Avg Reward: 1955.909722saving best model....\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total T: 1282653 Episode Num: 2151 Reward: 2278.623270 Avg Reward: 1955.972979saving best model....\n",
"\n",
"Total T: 1283653 Episode Num: 2152 Reward: 2296.058155 Avg Reward: 1958.867957saving best model....\n",
"\n",
"Total T: 1285653 Episode Num: 2154 Reward: 2047.697984 Avg Reward: 1959.030327saving best model....\n",
"\n",
"Total T: 1286653 Episode Num: 2155 Reward: 2173.697953 Avg Reward: 1962.093588saving best model....\n",
"\n",
"Total T: 1287653 Episode Num: 2156 Reward: 2275.344447 Avg Reward: 1965.456756saving best model....\n",
"\n",
"Total T: 1288653 Episode Num: 2157 Reward: 2041.508042 Avg Reward: 1976.215511saving best model....\n",
"\n",
"Total T: 1289653 Episode Num: 2158 Reward: 2285.170873 Avg Reward: 1992.755884saving best model....\n",
"\n",
"Total T: 1290653 Episode Num: 2159 Reward: 2247.382220 Avg Reward: 1992.975531saving best model....\n",
"\n",
"Total T: 1302653 Episode Num: 2171 Reward: 2123.387622 Avg Reward: 2009.603248saving best model....\n",
"\n",
"Total T: 1303653 Episode Num: 2172 Reward: 2033.092230 Avg Reward: 2010.851613saving best model....\n",
"\n",
"Total T: 1306653 Episode Num: 2175 Reward: 2108.636220 Avg Reward: 2014.844603saving best model....\n",
"\n",
"Total T: 1307847 Episode Num: 2177 Reward: 359.314961 Avg Reward: 1997.6389674saving best model....\n",
"\n",
"Total T: 1308847 Episode Num: 2178 Reward: 2090.023908 Avg Reward: 2017.604592saving best model....\n",
"\n",
"Total T: 1309847 Episode Num: 2179 Reward: 2325.507923 Avg Reward: 2022.155436saving best model....\n",
"\n",
"Total T: 1322152 Episode Num: 2192 Reward: 2207.796535 Avg Reward: 2020.090943saving best model....\n",
"\n",
"Total T: 1324152 Episode Num: 2194 Reward: 2077.735638 Avg Reward: 2042.514348saving best model....\n",
"\n",
"Total T: 1326152 Episode Num: 2196 Reward: 2099.490457 Avg Reward: 2042.996111saving best model....\n",
"\n",
"Total T: 1327152 Episode Num: 2197 Reward: 2300.156792 Avg Reward: 2065.613890saving best model....\n",
"\n",
"Total T: 1329152 Episode Num: 2199 Reward: 2208.152247 Avg Reward: 2066.353948saving best model....\n",
"\n",
"Total T: 1330152 Episode Num: 2200 Reward: 2274.157704 Avg Reward: 2069.350461saving best model....\n",
"\n",
"Total T: 1331152 Episode Num: 2201 Reward: 2294.586438 Avg Reward: 2071.562157saving best model....\n",
"\n",
"Total T: 1334152 Episode Num: 2204 Reward: 2239.825934 Avg Reward: 2071.062463saving best model....\n",
"\n",
"Total T: 1335152 Episode Num: 2205 Reward: 2068.378640 Avg Reward: 2091.693624saving best model....\n",
"\n",
"Total T: 1336152 Episode Num: 2206 Reward: 2319.505288 Avg Reward: 2092.168280saving best model....\n",
"\n",
"Total T: 1338152 Episode Num: 2208 Reward: 2168.682514 Avg Reward: 2092.876083saving best model....\n",
"\n",
"Total T: 1339152 Episode Num: 2209 Reward: 2257.336493 Avg Reward: 2093.785281saving best model....\n",
"\n",
"Total T: 1340152 Episode Num: 2210 Reward: 2244.577420 Avg Reward: 2094.723440saving best model....\n",
"\n",
"Total T: 1350933 Episode Num: 2221 Reward: 2414.021788 Avg Reward: 2092.064168saving best model....\n",
"\n",
"Total T: 1397224 Episode Num: 2269 Reward: 2211.787399 Avg Reward: 2108.363821saving best model....\n",
"\n",
"Total T: 1398224 Episode Num: 2270 Reward: 2200.814384 Avg Reward: 2110.563665saving best model....\n",
"\n",
"Total T: 1399224 Episode Num: 2271 Reward: 2141.530119 Avg Reward: 2110.745090saving best model....\n",
"\n",
"Total T: 1400224 Episode Num: 2272 Reward: 2335.074237 Avg Reward: 2113.764910saving best model....\n",
"\n",
"Total T: 1401224 Episode Num: 2273 Reward: 2378.968279 Avg Reward: 2117.731247saving best model....\n",
"\n",
"Total T: 1402224 Episode Num: 2274 Reward: 2213.004554 Avg Reward: 2121.295618saving best model....\n",
"\n",
"Total T: 1403224 Episode Num: 2275 Reward: 2209.527284 Avg Reward: 2122.304529saving best model....\n",
"\n",
"Total T: 1404224 Episode Num: 2276 Reward: 2340.998001 Avg Reward: 2124.045122saving best model....\n",
"\n",
"Total T: 1405224 Episode Num: 2277 Reward: 2243.985206 Avg Reward: 2142.891825saving best model....\n",
"\n",
"Total T: 1408224 Episode Num: 2280 Reward: 2272.415727 Avg Reward: 2143.057264saving best model....\n",
"\n",
"Total T: 1411224 Episode Num: 2283 Reward: 2249.070127 Avg Reward: 2156.231997saving best model....\n",
"\n",
"Total T: 1412224 Episode Num: 2284 Reward: 2227.564290 Avg Reward: 2157.487644saving best model....\n",
"\n",
"Total T: 1413224 Episode Num: 2285 Reward: 2336.607954 Avg Reward: 2159.781280saving best model....\n",
"\n",
"Total T: 1415224 Episode Num: 2287 Reward: 2169.868690 Avg Reward: 2161.012190saving best model....\n",
"\n",
"Total T: 1418224 Episode Num: 2290 Reward: 2234.837062 Avg Reward: 2161.669349saving best model....\n",
"\n",
"Total T: 1421224 Episode Num: 2293 Reward: 2197.163960 Avg Reward: 2161.569966saving best model....\n",
"\n",
"Total T: 1422224 Episode Num: 2294 Reward: 2303.901425 Avg Reward: 2163.831624saving best model....\n",
"\n",
"Total T: 1423224 Episode Num: 2295 Reward: 2392.457169 Avg Reward: 2164.828817saving best model....\n",
"\n",
"Total T: 1425224 Episode Num: 2297 Reward: 2252.686888 Avg Reward: 2166.499965saving best model....\n",
"\n",
"Total T: 1426224 Episode Num: 2298 Reward: 2310.881405 Avg Reward: 2168.036706saving best model....\n",
"\n",
"Total T: 1430224 Episode Num: 2302 Reward: 2116.694561 Avg Reward: 2164.771133saving best model....\n",
"\n",
"Total T: 1440224 Episode Num: 2312 Reward: 2278.558496 Avg Reward: 2166.895669saving best model....\n",
"\n",
"Total T: 1443224 Episode Num: 2315 Reward: 2254.536775 Avg Reward: 2168.831017saving best model....\n",
"\n",
"Total T: 1444224 Episode Num: 2316 Reward: 2188.512419 Avg Reward: 2175.198731saving best model....\n",
"\n",
"Total T: 1475526 Episode Num: 2348 Reward: 2135.145530 Avg Reward: 2168.894627saving best model....\n",
"\n",
"Total T: 1476526 Episode Num: 2349 Reward: 2216.933613 Avg Reward: 2187.327830saving best model....\n",
"\n",
"Total T: 1478526 Episode Num: 2351 Reward: 2248.355618 Avg Reward: 2187.890770saving best model....\n",
"\n",
"Total T: 1544526 Episode Num: 2417 Reward: 2195.301625 Avg Reward: 2179.272170saving best model....\n",
"\n",
"Total T: 1587526 Episode Num: 2460 Reward: 2193.923542 Avg Reward: 2196.188334saving best model....\n",
"\n",
"Total T: 1588526 Episode Num: 2461 Reward: 2358.079283 Avg Reward: 2199.106330saving best model....\n",
"\n",
"Total T: 1589526 Episode Num: 2462 Reward: 2178.848581 Avg Reward: 2200.069872saving best model....\n",
"\n",
"Total T: 1590526 Episode Num: 2463 Reward: 2230.661274 Avg Reward: 2200.488631saving best model....\n",
"\n",
"Total T: 1591526 Episode Num: 2464 Reward: 2388.481468 Avg Reward: 2203.955786saving best model....\n",
"\n",
"Total T: 1592526 Episode Num: 2465 Reward: 2330.158211 Avg Reward: 2206.252784saving best model....\n",
"\n",
"Total T: 1593526 Episode Num: 2466 Reward: 2160.839371 Avg Reward: 2206.343569saving best model....\n",
"\n",
"Total T: 1595526 Episode Num: 2468 Reward: 2111.398864 Avg Reward: 2207.923683saving best model....\n",
"\n",
"Total T: 1597526 Episode Num: 2470 Reward: 2264.217299 Avg Reward: 2208.946414saving best model....\n",
"\n",
"Total T: 1621526 Episode Num: 2494 Reward: 2209.593422 Avg Reward: 2208.974320saving best model....\n",
"\n",
"Total T: 1622526 Episode Num: 2495 Reward: 2299.560293 Avg Reward: 2210.572843saving best model....\n",
"\n",
"Total T: 1623526 Episode Num: 2496 Reward: 2296.624656 Avg Reward: 2211.139742saving best model....\n",
"\n",
"Total T: 1625526 Episode Num: 2498 Reward: 2161.657298 Avg Reward: 2211.158486saving best model....\n",
"\n",
"Total T: 1626526 Episode Num: 2499 Reward: 2285.763795 Avg Reward: 2213.280079saving best model....\n",
"\n",
"Total T: 1627526 Episode Num: 2500 Reward: 2226.273782 Avg Reward: 2213.505020saving best model....\n",
"\n",
"Total T: 1630526 Episode Num: 2503 Reward: 2265.035197 Avg Reward: 2214.648240saving best model....\n",
"\n",
"Total T: 1653512 Episode Num: 2526 Reward: 2290.807955 Avg Reward: 2213.546957saving best model....\n",
"\n",
"Total T: 1656512 Episode Num: 2529 Reward: 2309.028109 Avg Reward: 2214.949771saving best model....\n",
"\n",
"Total T: 1657512 Episode Num: 2530 Reward: 2148.466407 Avg Reward: 2215.514334saving best model....\n",
"\n",
"Total T: 1663512 Episode Num: 2536 Reward: 2355.611254 Avg Reward: 2214.986010saving best model....\n",
"\n",
"Total T: 1668512 Episode Num: 2541 Reward: 2204.236205 Avg Reward: 2219.975941saving best model....\n",
"\n",
"Total T: 1669512 Episode Num: 2542 Reward: 2392.129118 Avg Reward: 2221.728860saving best model....\n",
"\n",
"Total T: 1672512 Episode Num: 2545 Reward: 2367.130317 Avg Reward: 2222.072612saving best model....\n",
"\n",
"Total T: 1673512 Episode Num: 2546 Reward: 2350.432181 Avg Reward: 2223.383427saving best model....\n",
"\n",
"Total T: 1676512 Episode Num: 2549 Reward: 2175.408641 Avg Reward: 2223.509407saving best model....\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total T: 1677512 Episode Num: 2550 Reward: 2414.115895 Avg Reward: 2226.005899saving best model....\n",
"\n",
"Total T: 1678512 Episode Num: 2551 Reward: 2270.894867 Avg Reward: 2226.330132saving best model....\n",
"\n",
"Total T: 1783544 Episode Num: 2657 Reward: 2153.318392 Avg Reward: 2223.700933saving best model....\n",
"\n",
"Total T: 1784544 Episode Num: 2658 Reward: 2277.066450 Avg Reward: 2226.860937saving best model....\n",
"\n",
"Total T: 1785544 Episode Num: 2659 Reward: 2122.291268 Avg Reward: 2227.330161saving best model....\n",
"\n",
"Total T: 1788544 Episode Num: 2662 Reward: 2321.189522 Avg Reward: 2228.843355saving best model....\n",
"\n",
"Total T: 1800544 Episode Num: 2674 Reward: 2328.916106 Avg Reward: 2230.222128saving best model....\n",
"\n",
"Total T: 1801544 Episode Num: 2675 Reward: 2402.046611 Avg Reward: 2232.663871saving best model....\n",
"\n",
"Total T: 1819544 Episode Num: 2693 Reward: 2171.016039 Avg Reward: 2231.720215saving best model....\n",
"\n",
"Total T: 1821544 Episode Num: 2695 Reward: 2184.071621 Avg Reward: 2234.673290saving best model....\n",
"\n",
"Total T: 1827544 Episode Num: 2701 Reward: 2302.145385 Avg Reward: 2237.676467saving best model....\n",
"\n",
"Total T: 1829544 Episode Num: 2703 Reward: 2294.172838 Avg Reward: 2260.373372saving best model....\n",
"\n",
"Total T: 1830544 Episode Num: 2704 Reward: 2348.437319 Avg Reward: 2261.358574saving best model....\n",
"\n",
"Total T: 2025716 Episode Num: 2901 Reward: 2369.566094 Avg Reward: 2261.377187saving best model....\n",
"\n",
"Total T: 2026716 Episode Num: 2902 Reward: 2363.776229 Avg Reward: 2262.683288saving best model....\n",
"\n",
"Total T: 2027716 Episode Num: 2903 Reward: 2364.403532 Avg Reward: 2265.713936saving best model....\n",
"\n",
"Total T: 2028716 Episode Num: 2904 Reward: 2348.782221 Avg Reward: 2267.035678saving best model....\n",
"\n",
"Total T: 2029716 Episode Num: 2905 Reward: 2213.790423 Avg Reward: 2267.046273saving best model....\n",
"\n",
"Total T: 2030716 Episode Num: 2906 Reward: 2324.238609 Avg Reward: 2270.910208saving best model....\n",
"\n",
"Total T: 2031716 Episode Num: 2907 Reward: 2362.605007 Avg Reward: 2271.792372saving best model....\n",
"\n",
"Total T: 2032716 Episode Num: 2908 Reward: 2193.545262 Avg Reward: 2271.973718saving best model....\n",
"\n",
"Total T: 2033716 Episode Num: 2909 Reward: 2323.651273 Avg Reward: 2273.225299saving best model....\n",
"\n",
"Total T: 2039716 Episode Num: 2915 Reward: 2261.672338 Avg Reward: 2274.125151saving best model....\n",
"\n",
"Total T: 2041716 Episode Num: 2917 Reward: 2292.134488 Avg Reward: 2275.297822saving best model....\n",
"\n",
"Total T: 2042716 Episode Num: 2918 Reward: 2436.106488 Avg Reward: 2277.587426saving best model....\n",
"\n",
"Total T: 2047716 Episode Num: 2923 Reward: 2153.633101 Avg Reward: 2276.644183saving best model....\n",
"\n",
"Total T: 2048716 Episode Num: 2924 Reward: 2309.164043 Avg Reward: 2279.764625saving best model....\n",
"\n",
"Total T: 2055716 Episode Num: 2931 Reward: 2213.810087 Avg Reward: 2280.792951saving best model....\n",
"\n",
"Total T: 2056716 Episode Num: 2932 Reward: 2382.842279 Avg Reward: 2281.437055saving best model....\n",
"\n",
"Total T: 2057716 Episode Num: 2933 Reward: 2402.747680 Avg Reward: 2282.742137saving best model....\n",
"\n",
"Total T: 2058716 Episode Num: 2934 Reward: 2358.533544 Avg Reward: 2282.782894saving best model....\n",
"\n",
"Total T: 2082716 Episode Num: 2958 Reward: 2387.610250 Avg Reward: 2278.462805saving best model....\n",
"\n",
"Total T: 2083716 Episode Num: 2959 Reward: 2292.447475 Avg Reward: 2292.888698saving best model....\n",
"\n",
"Total T: 2084716 Episode Num: 2960 Reward: 2360.503764 Avg Reward: 2296.212227saving best model....\n",
"\n",
"Total T: 2095716 Episode Num: 2971 Reward: 2237.132254 Avg Reward: 2296.529476saving best model....\n",
"\n",
"Total T: 2096716 Episode Num: 2972 Reward: 2377.278959 Avg Reward: 2298.955639saving best model....\n",
"\n",
"Total T: 2098716 Episode Num: 2974 Reward: 2403.046292 Avg Reward: 2300.043672saving best model....\n",
"\n",
"Total T: 2099716 Episode Num: 2975 Reward: 2318.957888 Avg Reward: 2300.170343saving best model....\n",
"\n",
"Total T: 2100716 Episode Num: 2976 Reward: 2357.341800 Avg Reward: 2301.205319saving best model....\n",
"\n",
"Total T: 2101716 Episode Num: 2977 Reward: 2372.902845 Avg Reward: 2301.532729saving best model....\n",
"\n",
"Total T: 2104716 Episode Num: 2980 Reward: 2278.577205 Avg Reward: 2304.297573saving best model....\n",
"\n",
"Total T: 2105716 Episode Num: 2981 Reward: 2184.000451 Avg Reward: 2304.493286saving best model....\n",
"\n",
"Total T: 2147716 Episode Num: 3023 Reward: 2376.687748 Avg Reward: 2305.032192saving best model....\n",
"\n",
"Total T: 2148716 Episode Num: 3024 Reward: 2470.236271 Avg Reward: 2306.642914saving best model....\n",
"\n",
"Total T: 2149716 Episode Num: 3025 Reward: 2359.257020 Avg Reward: 2306.993727saving best model....\n",
"\n",
"Total T: 2150716 Episode Num: 3026 Reward: 2428.683904 Avg Reward: 2308.047295saving best model....\n",
"\n",
"Total T: 2154716 Episode Num: 3030 Reward: 2267.005528 Avg Reward: 2308.245183saving best model....\n",
"\n",
"Total T: 2187639 Episode Num: 3063 Reward: 2348.746874 Avg Reward: 2308.798742saving best model....\n",
"\n",
"Total T: 2188639 Episode Num: 3064 Reward: 2337.511754 Avg Reward: 2309.338858saving best model....\n",
"\n",
"Total T: 2189639 Episode Num: 3065 Reward: 2426.054401 Avg Reward: 2310.703234saving best model....\n",
"\n",
"Total T: 2190639 Episode Num: 3066 Reward: 2367.000867 Avg Reward: 2310.856375saving best model....\n",
"\n",
"Total T: 2191639 Episode Num: 3067 Reward: 2350.909388 Avg Reward: 2312.384510saving best model....\n",
"\n",
"Total T: 2192639 Episode Num: 3068 Reward: 2287.857333 Avg Reward: 2314.160484saving best model....\n",
"\n",
"Total T: 2194639 Episode Num: 3070 Reward: 2321.311670 Avg Reward: 2313.860515saving best model....\n",
"\n",
"Total T: 2210639 Episode Num: 3086 Reward: 2279.576818 Avg Reward: 2314.663742saving best model....\n",
"\n",
"Total T: 2211639 Episode Num: 3087 Reward: 2341.102102 Avg Reward: 2315.220682saving best model....\n",
"\n",
"Total T: 2216639 Episode Num: 3092 Reward: 2358.046487 Avg Reward: 2317.725334saving best model....\n",
"\n",
"Total T: 2217639 Episode Num: 3093 Reward: 2446.511075 Avg Reward: 2321.803850saving best model....\n",
"\n",
"Total T: 2218639 Episode Num: 3094 Reward: 2446.729687 Avg Reward: 2322.997402saving best model....\n",
"\n",
"Total T: 2476436 Episode Num: 3355 Reward: 2410.728740 Avg Reward: 2311.515216saving best model....\n",
"\n",
"Total T: 2477436 Episode Num: 3356 Reward: 2406.955269 Avg Reward: 2332.188476saving best model....\n",
"\n",
"Total T: 2478436 Episode Num: 3357 Reward: 2298.238307 Avg Reward: 2332.584360saving best model....\n",
"\n",
"Total T: 2480436 Episode Num: 3359 Reward: 2275.610972 Avg Reward: 2333.441170saving best model....\n",
"\n",
"Total T: 2481436 Episode Num: 3360 Reward: 2467.116348 Avg Reward: 2336.598461saving best model....\n",
"\n",
"Total T: 2482436 Episode Num: 3361 Reward: 2305.970150 Avg Reward: 2336.633027saving best model....\n",
"\n",
"Total T: 2484436 Episode Num: 3363 Reward: 2419.310456 Avg Reward: 2337.525082saving best model....\n",
"\n",
"Total T: 2485436 Episode Num: 3364 Reward: 2449.301079 Avg Reward: 2338.158351saving best model....\n",
"\n",
"Total T: 2486436 Episode Num: 3365 Reward: 2358.240732 Avg Reward: 2338.705752saving best model....\n",
"\n",
"Total T: 2488436 Episode Num: 3367 Reward: 2288.187367 Avg Reward: 2338.836166saving best model....\n",
"\n",
"Total T: 2489436 Episode Num: 3368 Reward: 2270.053563 Avg Reward: 2339.710343saving best model....\n",
"\n",
"Total T: 2490436 Episode Num: 3369 Reward: 2449.349531 Avg Reward: 2340.713924saving best model....\n",
"\n",
"Total T: 2492436 Episode Num: 3371 Reward: 2265.966478 Avg Reward: 2341.214860saving best model....\n",
"\n",
"Total T: 2493436 Episode Num: 3372 Reward: 2402.822523 Avg Reward: 2343.941677saving best model....\n",
"\n",
"Total T: 2494436 Episode Num: 3373 Reward: 2352.244790 Avg Reward: 2343.958599saving best model....\n",
"\n",
"Total T: 2495436 Episode Num: 3374 Reward: 2323.948352 Avg Reward: 2344.010485saving best model....\n",
"\n",
"Total T: 2496436 Episode Num: 3375 Reward: 2336.004591 Avg Reward: 2347.144307saving best model....\n",
"\n",
"Total T: 2674225 Episode Num: 3557 Reward: 2465.881392 Avg Reward: 2347.406816saving best model....\n",
"\n",
"Total T: 2675225 Episode Num: 3558 Reward: 2502.126712 Avg Reward: 2349.995398saving best model....\n",
"\n",
"Total T: 2676225 Episode Num: 3559 Reward: 2414.227466 Avg Reward: 2350.832502saving best model....\n",
"\n",
"Total T: 2687225 Episode Num: 3570 Reward: 2354.839472 Avg Reward: 2349.973442saving best model....\n",
"\n",
"Total T: 2696225 Episode Num: 3579 Reward: 2414.913937 Avg Reward: 2351.065531saving best model....\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total T: 2697225 Episode Num: 3580 Reward: 2511.032046 Avg Reward: 2352.228172saving best model....\n",
"\n",
"Total T: 2809146 Episode Num: 3693 Reward: 2350.420103 Avg Reward: 2352.268838saving best model....\n",
"\n",
"Total T: 2810146 Episode Num: 3694 Reward: 2332.138665 Avg Reward: 2353.183528saving best model....\n",
"\n",
"Total T: 2811146 Episode Num: 3695 Reward: 2569.125164 Avg Reward: 2378.745147saving best model....\n",
"\n",
"Total T: 2812146 Episode Num: 3696 Reward: 2508.268060 Avg Reward: 2379.971951saving best model....\n",
"\n",
"Total T: 2813146 Episode Num: 3697 Reward: 2337.819026 Avg Reward: 2380.401157saving best model....\n",
"\n",
"Total T: 2814146 Episode Num: 3698 Reward: 2342.367980 Avg Reward: 2383.982854saving best model....\n",
"\n",
"Total T: 2815146 Episode Num: 3699 Reward: 2383.442357 Avg Reward: 2384.515742saving best model....\n",
"\n",
"Total T: 2816146 Episode Num: 3700 Reward: 2445.194353 Avg Reward: 2386.066220saving best model....\n",
"\n",
"Total T: 2817146 Episode Num: 3701 Reward: 2515.765400 Avg Reward: 2386.134727saving best model....\n",
"\n",
"Total T: 2830001 Episode Num: 3714 Reward: 2377.537848 Avg Reward: 2385.589093saving best model....\n",
"\n",
"Total T: 2831001 Episode Num: 3715 Reward: 2417.871830 Avg Reward: 2386.842204saving best model....\n",
"\n",
"Total T: 2833001 Episode Num: 3717 Reward: 2371.062311 Avg Reward: 2386.159154saving best model....\n",
"\n",
"Total T: 2836001 Episode Num: 3720 Reward: 2374.172877 Avg Reward: 2386.188476saving best model....\n",
"\n",
"Total T: 2844001 Episode Num: 3728 Reward: 2444.492765 Avg Reward: 2389.378069saving best model....\n",
"\n",
"Total T: 2845001 Episode Num: 3729 Reward: 2492.229906 Avg Reward: 2390.442268saving best model....\n",
"\n",
"Total T: 2846001 Episode Num: 3730 Reward: 2477.708993 Avg Reward: 2391.766433saving best model....\n",
"\n",
"Total T: 2850001 Episode Num: 3734 Reward: 2405.913545 Avg Reward: 2390.968404saving best model....\n",
"\n",
"Total T: 2860001 Episode Num: 3744 Reward: 2410.475301 Avg Reward: 2392.011909saving best model....\n",
"\n",
"Total T: 2862001 Episode Num: 3746 Reward: 2377.329223 Avg Reward: 2392.187182saving best model....\n",
"\n",
"Total T: 2864001 Episode Num: 3748 Reward: 2314.104632 Avg Reward: 2392.661296saving best model....\n",
"\n",
"Total T: 3076353 Episode Num: 3962 Reward: 2323.262045 Avg Reward: 2390.699533saving best model....\n",
"\n",
"Total T: 3094353 Episode Num: 3980 Reward: 2488.968907 Avg Reward: 2394.299379saving best model....\n",
"\n",
"Total T: 3312909 Episode Num: 4202 Reward: 2527.219390 Avg Reward: 2409.750232saving best model....\n",
"\n",
"Total T: 3313909 Episode Num: 4203 Reward: 2592.028576 Avg Reward: 2411.850027saving best model....\n",
"\n",
"Total T: 3314909 Episode Num: 4204 Reward: 2605.530976 Avg Reward: 2414.106672saving best model....\n",
"\n",
"Total T: 3315909 Episode Num: 4205 Reward: 2450.283907 Avg Reward: 2415.225172saving best model....\n",
"\n",
"Total T: 3316909 Episode Num: 4206 Reward: 2525.773282 Avg Reward: 2417.527727saving best model....\n",
"\n",
"Total T: 3321909 Episode Num: 4211 Reward: 2467.181438 Avg Reward: 2417.629422saving best model....\n",
"\n",
"Total T: 3328909 Episode Num: 4218 Reward: 2519.647356 Avg Reward: 2417.049697saving best model....\n",
"\n",
"Total T: 3330909 Episode Num: 4220 Reward: 2410.076400 Avg Reward: 2418.464940saving best model....\n",
"\n",
"Total T: 3331909 Episode Num: 4221 Reward: 2469.473985 Avg Reward: 2419.348305saving best model....\n",
"\n",
"Total T: 3332909 Episode Num: 4222 Reward: 2377.365073 Avg Reward: 2419.429706saving best model....\n",
"\n",
"Total T: 3339909 Episode Num: 4229 Reward: 2420.187748 Avg Reward: 2418.927405saving best model....\n",
"\n",
"Total T: 3340909 Episode Num: 4230 Reward: 2524.823406 Avg Reward: 2420.541890saving best model....\n",
"\n",
"Total T: 3341909 Episode Num: 4231 Reward: 2476.022348 Avg Reward: 2436.932421saving best model....\n",
"\n",
"Total T: 3342909 Episode Num: 4232 Reward: 2463.654157 Avg Reward: 2437.393380saving best model....\n",
"\n",
"Total T: 3343909 Episode Num: 4233 Reward: 2502.636857 Avg Reward: 2438.092138saving best model....\n",
"\n",
"Total T: 3344909 Episode Num: 4234 Reward: 2454.096131 Avg Reward: 2439.323447saving best model....\n",
"\n",
"Total T: 3345909 Episode Num: 4235 Reward: 2424.717197 Avg Reward: 2439.533469saving best model....\n",
"\n",
"Total T: 3347909 Episode Num: 4237 Reward: 2373.956316 Avg Reward: 2440.099359saving best model....\n",
"\n",
"Total T: 3348909 Episode Num: 4238 Reward: 2481.649648 Avg Reward: 2441.930308saving best model....\n",
"\n",
"Total T: 3349909 Episode Num: 4239 Reward: 2584.348264 Avg Reward: 2445.140738saving best model....\n",
"\n",
"Total T: 3354909 Episode Num: 4244 Reward: 2660.881966 Avg Reward: 2446.301478saving best model....\n",
"\n",
"Total T: 3360909 Episode Num: 4250 Reward: 2454.512823 Avg Reward: 2445.720651saving best model....\n",
"\n",
"Total T: 3362909 Episode Num: 4252 Reward: 2411.838660 Avg Reward: 2448.713267saving best model....\n",
"\n",
"Total T: 3363909 Episode Num: 4253 Reward: 2452.242794 Avg Reward: 2448.866561saving best model....\n",
"\n",
"Total T: 3368909 Episode Num: 4258 Reward: 2460.688496 Avg Reward: 2450.267813saving best model....\n",
"\n",
"Total T: 3369909 Episode Num: 4259 Reward: 2470.514952 Avg Reward: 2450.655349saving best model....\n",
"\n",
"Total T: 3371909 Episode Num: 4261 Reward: 2386.097083 Avg Reward: 2450.657544saving best model....\n",
"\n",
"Total T: 3373909 Episode Num: 4263 Reward: 2267.674041 Avg Reward: 2451.951235saving best model....\n",
"\n",
"Total T: 3374909 Episode Num: 4264 Reward: 2515.055709 Avg Reward: 2452.628128saving best model....\n",
"\n",
"Total T: 3375909 Episode Num: 4265 Reward: 2495.144286 Avg Reward: 2454.344502saving best model....\n",
"\n",
"Total T: 3381909 Episode Num: 4271 Reward: 2421.444522 Avg Reward: 2453.799039saving best model....\n",
"\n",
"Total T: 3382909 Episode Num: 4272 Reward: 2544.297140 Avg Reward: 2455.171976saving best model....\n",
"\n",
"Total T: 3383909 Episode Num: 4273 Reward: 2478.669311 Avg Reward: 2455.343207saving best model....\n",
"\n",
"Total T: 3384909 Episode Num: 4274 Reward: 2490.346491 Avg Reward: 2455.716744saving best model....\n",
"\n",
"Total T: 3385909 Episode Num: 4275 Reward: 2497.383338 Avg Reward: 2456.180096saving best model....\n",
"\n",
"Total T: 3387909 Episode Num: 4277 Reward: 2483.654232 Avg Reward: 2455.857844saving best model....\n",
"\n",
"Total T: 3390909 Episode Num: 4280 Reward: 2477.536472 Avg Reward: 2456.111898saving best model....\n",
"\n",
"Total T: 3391909 Episode Num: 4281 Reward: 2546.949060 Avg Reward: 2457.675940saving best model....\n",
"\n",
"Total T: 3392909 Episode Num: 4282 Reward: 2472.740467 Avg Reward: 2458.071198saving best model....\n",
"\n",
"Total T: 3393909 Episode Num: 4283 Reward: 2516.822066 Avg Reward: 2459.050470saving best model....\n",
"\n",
"Total T: 3394909 Episode Num: 4284 Reward: 2443.323380 Avg Reward: 2468.763484saving best model....\n",
"\n",
"Total T: 3395909 Episode Num: 4285 Reward: 2499.832183 Avg Reward: 2469.092760saving best model....\n",
"\n",
"Total T: 3399909 Episode Num: 4289 Reward: 2538.692761 Avg Reward: 2469.399040saving best model....\n",
"\n",
"Total T: 3556781 Episode Num: 4450 Reward: 2621.512585 Avg Reward: 2467.690498saving best model....\n",
"\n",
"Total T: 3557781 Episode Num: 4451 Reward: 2510.322269 Avg Reward: 2480.559453saving best model....\n",
"\n",
"Total T: 3558781 Episode Num: 4452 Reward: 2489.918402 Avg Reward: 2481.447998saving best model....\n",
"\n",
"Total T: 3559781 Episode Num: 4453 Reward: 2477.918112 Avg Reward: 2481.966846saving best model....\n",
"\n",
"Total T: 3563781 Episode Num: 4457 Reward: 2490.343949 Avg Reward: 2482.528808saving best model....\n",
"\n",
"Total T: 3565781 Episode Num: 4459 Reward: 2478.245739 Avg Reward: 2484.580858saving best model....\n",
"\n",
"Total T: 3566781 Episode Num: 4460 Reward: 2536.853393 Avg Reward: 2487.341995saving best model....\n",
"\n",
"Total T: 3609781 Episode Num: 4503 Reward: 2544.833473 Avg Reward: 2487.081777saving best model....\n",
"\n",
"Total T: 3610781 Episode Num: 4504 Reward: 2571.084328 Avg Reward: 2487.524957saving best model....\n",
"\n",
"Total T: 3615781 Episode Num: 4509 Reward: 2507.152480 Avg Reward: 2487.021592saving best model....\n",
"\n",
"Total T: 3616781 Episode Num: 4510 Reward: 2615.299517 Avg Reward: 2488.992194saving best model....\n",
"\n",
"Total T: 3617781 Episode Num: 4511 Reward: 2526.698008 Avg Reward: 2489.636547saving best model....\n",
"\n",
"Total T: 3618781 Episode Num: 4512 Reward: 2420.578944 Avg Reward: 2489.725465saving best model....\n",
"\n",
"Total T: 3803299 Episode Num: 4699 Reward: 2460.935178 Avg Reward: 2488.170857saving best model....\n",
"\n",
"Total T: 3804299 Episode Num: 4700 Reward: 2616.896801 Avg Reward: 2501.919947saving best model....\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total T: 3805299 Episode Num: 4701 Reward: 2544.888870 Avg Reward: 2503.012489saving best model....\n",
"\n",
"Total T: 3806299 Episode Num: 4702 Reward: 2533.532089 Avg Reward: 2503.795806saving best model....\n",
"\n",
"Total T: 3807299 Episode Num: 4703 Reward: 2515.213482 Avg Reward: 2504.706304saving best model....\n",
"\n",
"Total T: 3808299 Episode Num: 4704 Reward: 2579.330991 Avg Reward: 2513.139760saving best model....\n",
"\n",
"Total T: 3811299 Episode Num: 4707 Reward: 2521.731264 Avg Reward: 2513.070881saving best model....\n",
"\n",
"Total T: 3812299 Episode Num: 4708 Reward: 2515.768538 Avg Reward: 2513.271868saving best model....\n",
"\n",
"Total T: 3815299 Episode Num: 4711 Reward: 2473.403224 Avg Reward: 2513.075645saving best model....\n",
"\n",
"Total T: 3819299 Episode Num: 4715 Reward: 2517.097845 Avg Reward: 2515.097030saving best model....\n",
"\n",
"Total T: 3820299 Episode Num: 4716 Reward: 2605.361263 Avg Reward: 2515.898618saving best model....\n",
"\n",
"Total T: 3821299 Episode Num: 4717 Reward: 2652.036548 Avg Reward: 2517.519978saving best model....\n",
"\n",
"Total T: 3822299 Episode Num: 4718 Reward: 2495.833785 Avg Reward: 2518.069069saving best model....\n",
"\n",
"Total T: 3832299 Episode Num: 4728 Reward: 2540.134428 Avg Reward: 2518.583238saving best model....\n",
"\n",
"Total T: 3833299 Episode Num: 4729 Reward: 2553.186390 Avg Reward: 2519.228081saving best model....\n",
"\n",
"Total T: 3837299 Episode Num: 4733 Reward: 2476.722248 Avg Reward: 2520.302369saving best model....\n",
"\n",
"Total T: 3838299 Episode Num: 4734 Reward: 2607.329411 Avg Reward: 2521.875432saving best model....\n",
"\n",
"Total T: 3844299 Episode Num: 4740 Reward: 2477.433662 Avg Reward: 2521.491254saving best model....\n",
"\n",
"Total T: 3845299 Episode Num: 4741 Reward: 2580.186531 Avg Reward: 2522.550975saving best model....\n",
"\n",
"Total T: 3945544 Episode Num: 4842 Reward: 2528.581087 Avg Reward: 2503.001519saving best model....\n",
"\n",
"Total T: 3947544 Episode Num: 4844 Reward: 2431.939322 Avg Reward: 2523.192406saving best model....\n",
"\n",
"Total T: 3948544 Episode Num: 4845 Reward: 2716.584696 Avg Reward: 2526.685884saving best model....\n",
"\n",
"Total T: 3949544 Episode Num: 4846 Reward: 2636.490789 Avg Reward: 2528.312800saving best model....\n",
"\n",
"Total T: 3950544 Episode Num: 4847 Reward: 2627.301597 Avg Reward: 2529.201942saving best model....\n",
"\n",
"Total T: 3952544 Episode Num: 4849 Reward: 2491.674385 Avg Reward: 2529.649269saving best model....\n",
"\n",
"Total T: 3953544 Episode Num: 4850 Reward: 2589.780944 Avg Reward: 2530.516178saving best model....\n",
"\n",
"Total T: 3964544 Episode Num: 4861 Reward: 2484.815300 Avg Reward: 2529.510955saving best model....\n",
"\n",
"Total T: 3965544 Episode Num: 4862 Reward: 2634.469138 Avg Reward: 2531.959503saving best model....\n",
"\n",
"Total T: 3968544 Episode Num: 4865 Reward: 2487.920686 Avg Reward: 2531.520343saving best model....\n",
"\n",
"Total T: 3970544 Episode Num: 4867 Reward: 2525.026694 Avg Reward: 2531.394949saving best model....\n",
"\n",
"Total T: 3971544 Episode Num: 4868 Reward: 2702.666353 Avg Reward: 2534.430254saving best model....\n",
"\n",
"Total T: 3972544 Episode Num: 4869 Reward: 2593.952477 Avg Reward: 2535.193934saving best model....\n",
"\n",
"Total T: 3973544 Episode Num: 4870 Reward: 2535.141773 Avg Reward: 2535.655316saving best model....\n",
"\n",
"Total T: 3974544 Episode Num: 4871 Reward: 2569.018526 Avg Reward: 2536.476554saving best model....\n",
"\n",
"Total T: 4040544 Episode Num: 4937 Reward: 2506.281276 Avg Reward: 2536.142316saving best model....\n",
"\n",
"Total T: 4143340 Episode Num: 5041 Reward: 2570.353100 Avg Reward: 2536.643663saving best model....\n",
"\n",
"Total T: 4147340 Episode Num: 5045 Reward: 2619.707146 Avg Reward: 2538.319509saving best model....\n",
"\n",
"Total T: 4149340 Episode Num: 5047 Reward: 2489.976830 Avg Reward: 2538.440159saving best model....\n",
"\n",
"Total T: 4150340 Episode Num: 5048 Reward: 2596.150038 Avg Reward: 2539.269417saving best model....\n",
"\n",
"Total T: 4151340 Episode Num: 5049 Reward: 2508.139598 Avg Reward: 2539.481933saving best model....\n",
"\n",
"Total T: 4152340 Episode Num: 5050 Reward: 2656.446812 Avg Reward: 2541.108077saving best model....\n",
"\n",
"Total T: 4153340 Episode Num: 5051 Reward: 2653.111281 Avg Reward: 2544.066128saving best model....\n",
"\n",
"Total T: 4154340 Episode Num: 5052 Reward: 2619.972178 Avg Reward: 2544.687510saving best model....\n",
"\n",
"Total T: 4155340 Episode Num: 5053 Reward: 2633.571978 Avg Reward: 2544.925633saving best model....\n",
"\n",
"Total T: 4156340 Episode Num: 5054 Reward: 2591.273245 Avg Reward: 2545.098056saving best model....\n",
"\n",
"Total T: 4157340 Episode Num: 5055 Reward: 2511.098302 Avg Reward: 2545.606603saving best model....\n",
"\n",
"Total T: 4158340 Episode Num: 5056 Reward: 2563.140150 Avg Reward: 2546.569701saving best model....\n",
"\n",
"Total T: 4174340 Episode Num: 5072 Reward: 2419.913965 Avg Reward: 2545.040836saving best model....\n",
"\n",
"Total T: 4999141 Episode Num: 6480 Reward: 343.330499 Avg Reward: 2446.4885350"
]
}
],
"source": [
"# Train agent\n",
"train(policy, env)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"policy.load()\n",
"\n",
"for i in range(100):\n",
" evaluate_policy(policy, env, render=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"env.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment