analyticsindiamagazine/openai-gym-classic-control-bm.ipynb

## openai-gym-classic-control-bm.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.4"
    },
    "colab": {
      "name": "OpenAI Gym - Classic Control BM.ipynb",
      "provenance": [],
      "include_colab_link": true
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/analyticsindiamagazine/d9d4d7e10c653aadb45f61a8e1b2efab/openai-gym-classic-control-bm.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PlJhNeqXlZxw",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import gym\n",
        "\n",
        "import matplotlib.pyplot as plt\n",
        "%matplotlib inline\n",
        "plt.style.use('fivethirtyeight')\n",
        "\n",
        "import pandas as pd\n",
        "import imageio\n",
        "import time\n",
        "import numpy as np\n",
        "import gym\n",
        "from stable_baselines.common.vec_env import DummyVecEnv, VecVideoRecorder, SubprocVecEnv\n",
        "from stable_baselines.ddpg.policies import CnnPolicy, MlpPolicy\n",
        "from stable_baselines.common.policies import MlpLstmPolicy, CnnLstmPolicy, MlpPolicy\n",
        "from stable_baselines import A2C, PPO2, SAC, TD3, TRPO, DDPG, ACER, ACKTR, SAC\n",
        "from stable_baselines.common.evaluation import evaluate_policy\n",
        "from stable_baselines.common import set_global_seeds\n",
        "\n",
        "from stable_baselines.bench import Monitor\n",
        "from stable_baselines.results_plotter import load_results, ts2xy"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "1IFI9-VWlZx5",
        "colab_type": "code",
        "colab": {},
        "outputId": "11ab22f5-9ee7-4ba4-b850-64bd7fd58ccc"
      },
      "source": [
        "gym.__version__"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'0.15.3'"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 2
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HnakU1vdlZx9",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "def make_env(env_id, rank, seed=0):\n",
        "    \"\"\"\n",
        "    Utility function for multiprocessed env.\n",
        "    \n",
        "    :param env_id: (str) the environment ID\n",
        "    :param num_env: (int) the number of environment you wish to have in subprocesses\n",
        "    :param seed: (int) the inital seed for RNG\n",
        "    :param rank: (int) index of the subprocess\n",
        "    \"\"\"\n",
        "    def _init():\n",
        "        env = gym.make(env_id)\n",
        "        env.seed(seed + rank)\n",
        "        return env\n",
        "    set_global_seeds(seed)\n",
        "    return _init\n",
        "\n",
        "\n",
        "\n",
        "def evaluate(model, num_steps=1000):\n",
        "    \"\"\"\n",
        "    Evaluate a RL agent\n",
        "    :param model: (BaseRLModel object) the RL Agent\n",
        "    :param num_steps: (int) number of timesteps to evaluate it\n",
        "    :return: (float) Mean reward\n",
        "    \"\"\"\n",
        "    \n",
        "    episode_rewards = [[0.0] for _ in range(env.num_envs)]\n",
        "    obs = env.reset()\n",
        "    for i in range(num_steps):\n",
        "        # _states are only useful when using LSTM policies\n",
        "        actions, _states = model.predict(obs)\n",
        "        # here, action, rewards and dones are arrays\n",
        "        # because we are using vectorized env\n",
        "        obs, rewards, dones, info = env.step(actions)\n",
        "\n",
        "        # Stats\n",
        "        for i in range(env.num_envs):\n",
        "            episode_rewards[i][-1] += rewards[i]\n",
        "            if dones[i]:\n",
        "                episode_rewards[i].append(0.0)\n",
        "\n",
        "    mean_rewards =  [0.0 for _ in range(env.num_envs)]\n",
        "    n_episodes = 0\n",
        "    for i in range(env.num_envs):\n",
        "        mean_rewards[i] = np.mean(episode_rewards[i])     \n",
        "        n_episodes += len(episode_rewards[i])   \n",
        "\n",
        "    # Compute mean reward\n",
        "    mean_reward = round(np.mean(mean_rewards), 1)\n",
        "    print(\"Mean reward:\", mean_reward, \"Num episodes:\", n_episodes)\n",
        "\n",
        "    return mean_reward"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PDW8hNq_lZyA",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Create a CNN based policy and optimize it using PPO2.\n",
        "# ppo_params = {\"gamma\" : 0.99,\n",
        "#               \"n_steps\" : 128,\n",
        "#               \"ent_coef\" : 0.01,\n",
        "#               \"learning_rate\" : 0.00025,\n",
        "#               \"vf_coef\" : 0.5,\n",
        "#               \"max_grad_norm\" : 0.5,\n",
        "#               \"lam\" : 0.95,\n",
        "#               \"nminibatches\" : 4,\n",
        "#               \"noptepochs\" : 4,\n",
        "#               \"cliprange\" :0.2,\n",
        "#               \"cliprange_vf\" : None,\n",
        "#               \"verbose\" : 1,\n",
        "#               \"tensorboard_log\" : None,\n",
        "#               \"_init_setup_model\" : True,\n",
        "#               \"policy_kwargs\" : None,\n",
        "#               \"full_tensorboard_log\" : False,\n",
        "#               \"seed\" : None,\n",
        "#               \"n_cpu_tf_sess\" : None\n",
        "#               }\n",
        "\n",
        "# params=ppo_params, "
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "reBxig4blZyD",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "def train_save_agent(model, env, gif_name, time_steps=int(1e4), \n",
        "                    save_gif=False):\n",
        "    #use the policy, environment and define params to compile the PPO2 model..\n",
        "#     model = model #PPO2(\"MlpPolicy\", env)#, **params)\n",
        "\n",
        "    s_time = time.time()\n",
        "    #Train the model\n",
        "    model.learn(total_timesteps=time_steps)\n",
        "    e_time = time.time()\n",
        "    tot_time = e_time - s_time\n",
        "    print(f\"Total Run-Time : , {tot_time : 0.3f} seconds\")\n",
        "\n",
        "    if save_gif:\n",
        "        ########### Record-GIF ###########\n",
        "        images = []\n",
        "        obs = model.env.reset()\n",
        "        img = model.env.render(mode='rgb_array')\n",
        "        gif_length = 500\n",
        "\n",
        "        for i in range(gif_length):\n",
        "            images.append(img)\n",
        "            action, _ = model.predict(obs)\n",
        "            obs, _, _ ,_ = model.env.step(action)\n",
        "            img = model.env.render(mode='rgb_array')\n",
        "\n",
        "        imageio.mimsave(f'{gif_name}-{timesteps}.gif', [np.array(img) for i, img in enumerate(images) if i%2 == 0],\n",
        "                        fps=29)\n",
        "    \n",
        "    return model, tot_time"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "scrolled": true,
        "id": "z87sphMklZyG",
        "colab_type": "code",
        "colab": {},
        "outputId": "f5fbb6d8-eb0f-4caf-89f6-b8c14ade38ba"
      },
      "source": [
        "all_algs = [\"A2C\", \"PPO2\",  \"ACER\", \"ACKTR\"]\n",
        "\n",
        "# Create log dir\n",
        "import os\n",
        "\n",
        "log_dir = \"/tmp/gym/\"\n",
        "os.makedirs(log_dir, exist_ok=True)\n",
        "env_list =  [\"Pendulum-v0\", \"MountainCar-v0\", \"Acrobot-v1\", \"CartPole-v1\"]\n",
        "timesteps = int(1e6)\n",
        "num_cpu = 4  # Number of processes to use\n",
        "\n",
        "\n",
        "game_df = pd.DataFrame()\n",
        "\n",
        "for env_id in env_list:\n",
        "    print(f\"{env_id}......\")\n",
        "#     env = gym.make(env_id) #\n",
        "    # Logs will be saved in log_dir/monitor.csv\n",
        "#     env = Monitor(env, log_dir, allow_early_resets=True)\n",
        "#     env = DummyVecEnv([lambda: gym.make(env_id)])\n",
        "    env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])\n",
        "    \n",
        "    alg_detail_df = pd.DataFrame()\n",
        "    for alg in all_algs:\n",
        "        if env_id == \"Pendulum-v0\" and alg == 'ACER':\n",
        "            print(f\"{env_id, alg}\")\n",
        "            pass\n",
        "        \n",
        "        else:\n",
        "            print(f'{alg}.....')    \n",
        "            model = eval(alg + \"('MlpPolicy', env)\")\n",
        "            tr_model, run_time = train_save_agent(model, env,  alg, time_steps=timesteps, save_gif=False)\n",
        "    #         mean_reward, std_reward = evaluate_policy(tr_model, tr_model.get_env(), n_eval_episodes=20)\n",
        "            mean_reward = evaluate(tr_model, num_steps=1000)\n",
        "\n",
        "            alg_detail_df = alg_detail_df.append([[env_id, alg, run_time, mean_reward]]) #, std_reward]])\n",
        "\n",
        "            print(f\"Mean Reward : {mean_reward} \") #\"| Std_Reward :  {std_reward}\")\n",
        "    \n",
        "    game_df = game_df.append(alg_detail_df) #], axis=1)\n",
        "    \n",
        "game_df.columns = ['Envir', 'Algorithm', 'Run_Time', 'Mean_Rewards'] #, 'Std_Rewards']    "
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Pendulum-v0......\n",
            "A2C.....\n",
            "Total Run-Time : ,  257.970 seconds\n",
            "Mean reward: -551.4 Num episodes: 24\n",
            "Mean Reward : -551.4 \n",
            "PPO2.....\n",
            "Total Run-Time : ,  338.824 seconds\n",
            "Mean reward: -939.9 Num episodes: 24\n",
            "Mean Reward : -939.9 \n",
            "('Pendulum-v0', 'ACER')\n",
            "ACKTR.....\n",
            "Total Run-Time : ,  242.154 seconds\n",
            "Mean reward: -876.8 Num episodes: 24\n",
            "Mean Reward : -876.8 \n",
            "MountainCar-v0......\n",
            "A2C.....\n",
            "Total Run-Time : ,  237.251 seconds\n",
            "Mean reward: -166.7 Num episodes: 24\n",
            "Mean Reward : -166.7 \n",
            "PPO2.....\n",
            "Total Run-Time : ,  690.426 seconds\n",
            "Mean reward: -166.7 Num episodes: 24\n",
            "Mean Reward : -166.7 \n",
            "ACER.....\n",
            "Total Run-Time : ,  346.463 seconds\n",
            "Mean reward: -166.7 Num episodes: 24\n",
            "Mean Reward : -166.7 \n",
            "ACKTR.....\n",
            "Total Run-Time : ,  234.560 seconds\n",
            "Mean reward: -166.7 Num episodes: 24\n",
            "Mean Reward : -166.7 \n",
            "Acrobot-v1......\n",
            "A2C.....\n",
            "Total Run-Time : ,  290.163 seconds\n",
            "Mean reward: -89.6 Num episodes: 46\n",
            "Mean Reward : -89.6 \n",
            "PPO2.....\n",
            "Total Run-Time : ,  273.282 seconds\n",
            "Mean reward: -74.9 Num episodes: 53\n",
            "Mean Reward : -74.9 \n",
            "ACER.....\n",
            "Total Run-Time : ,  392.469 seconds\n",
            "Mean reward: -77.8 Num episodes: 51\n",
            "Mean Reward : -77.8 \n",
            "ACKTR.....\n",
            "Total Run-Time : ,  253.496 seconds\n",
            "Mean reward: -72.3 Num episodes: 55\n",
            "Mean Reward : -72.3 \n",
            "CartPole-v1......\n",
            "A2C.....\n",
            "Total Run-Time : ,  215.154 seconds\n",
            "Mean reward: 333.3 Num episodes: 12\n",
            "Mean Reward : 333.3 \n",
            "PPO2.....\n",
            "Total Run-Time : ,  189.328 seconds\n",
            "Mean reward: 333.3 Num episodes: 12\n",
            "Mean Reward : 333.3 \n",
            "ACER.....\n",
            "Total Run-Time : ,  300.396 seconds\n",
            "Mean reward: 333.3 Num episodes: 12\n",
            "Mean Reward : 333.3 \n",
            "ACKTR.....\n",
            "Total Run-Time : ,  189.950 seconds\n",
            "Mean reward: 333.3 Num episodes: 12\n",
            "Mean Reward : 333.3 \n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "cBh97yTBlZyJ",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# game_df.to_csv('Runtime_details.csv', index=False) "
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "3iWvj8_LlZyV",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "YBSRmVpRlZyX",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "nTa2uBOnlZya",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Nf7E3NDGlZyc",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.4"
	},
	"colab": {
	"name": "OpenAI Gym - Classic Control BM.ipynb",
	"provenance": [],
	"include_colab_link": true
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/analyticsindiamagazine/d9d4d7e10c653aadb45f61a8e1b2efab/openai-gym-classic-control-bm.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "PlJhNeqXlZxw",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"import gym\n",
	"\n",
	"import matplotlib.pyplot as plt\n",
	"%matplotlib inline\n",
	"plt.style.use('fivethirtyeight')\n",
	"\n",
	"import pandas as pd\n",
	"import imageio\n",
	"import time\n",
	"import numpy as np\n",
	"import gym\n",
	"from stable_baselines.common.vec_env import DummyVecEnv, VecVideoRecorder, SubprocVecEnv\n",
	"from stable_baselines.ddpg.policies import CnnPolicy, MlpPolicy\n",
	"from stable_baselines.common.policies import MlpLstmPolicy, CnnLstmPolicy, MlpPolicy\n",
	"from stable_baselines import A2C, PPO2, SAC, TD3, TRPO, DDPG, ACER, ACKTR, SAC\n",
	"from stable_baselines.common.evaluation import evaluate_policy\n",
	"from stable_baselines.common import set_global_seeds\n",
	"\n",
	"from stable_baselines.bench import Monitor\n",
	"from stable_baselines.results_plotter import load_results, ts2xy"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "1IFI9-VWlZx5",
	"colab_type": "code",
	"colab": {},
	"outputId": "11ab22f5-9ee7-4ba4-b850-64bd7fd58ccc"
	},
	"source": [
	"gym.__version__"
	],
	"execution_count": 0,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"'0.15.3'"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 2
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "HnakU1vdlZx9",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"def make_env(env_id, rank, seed=0):\n",
	" \"\"\"\n",
	" Utility function for multiprocessed env.\n",
	" \n",
	" :param env_id: (str) the environment ID\n",
	" :param num_env: (int) the number of environment you wish to have in subprocesses\n",
	" :param seed: (int) the inital seed for RNG\n",
	" :param rank: (int) index of the subprocess\n",
	" \"\"\"\n",
	" def _init():\n",
	" env = gym.make(env_id)\n",
	" env.seed(seed + rank)\n",
	" return env\n",
	" set_global_seeds(seed)\n",
	" return _init\n",
	"\n",
	"\n",
	"\n",
	"def evaluate(model, num_steps=1000):\n",
	" \"\"\"\n",
	" Evaluate a RL agent\n",
	" :param model: (BaseRLModel object) the RL Agent\n",
	" :param num_steps: (int) number of timesteps to evaluate it\n",
	" :return: (float) Mean reward\n",
	" \"\"\"\n",
	" \n",
	" episode_rewards = [[0.0] for _ in range(env.num_envs)]\n",
	" obs = env.reset()\n",
	" for i in range(num_steps):\n",
	" # _states are only useful when using LSTM policies\n",
	" actions, _states = model.predict(obs)\n",
	" # here, action, rewards and dones are arrays\n",
	" # because we are using vectorized env\n",
	" obs, rewards, dones, info = env.step(actions)\n",
	"\n",
	" # Stats\n",
	" for i in range(env.num_envs):\n",
	" episode_rewards[i][-1] += rewards[i]\n",
	" if dones[i]:\n",
	" episode_rewards[i].append(0.0)\n",
	"\n",
	" mean_rewards = [0.0 for _ in range(env.num_envs)]\n",
	" n_episodes = 0\n",
	" for i in range(env.num_envs):\n",
	" mean_rewards[i] = np.mean(episode_rewards[i]) \n",
	" n_episodes += len(episode_rewards[i]) \n",
	"\n",
	" # Compute mean reward\n",
	" mean_reward = round(np.mean(mean_rewards), 1)\n",
	" print(\"Mean reward:\", mean_reward, \"Num episodes:\", n_episodes)\n",
	"\n",
	" return mean_reward"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "PDW8hNq_lZyA",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"# Create a CNN based policy and optimize it using PPO2.\n",
	"# ppo_params = {\"gamma\" : 0.99,\n",
	"# \"n_steps\" : 128,\n",
	"# \"ent_coef\" : 0.01,\n",
	"# \"learning_rate\" : 0.00025,\n",
	"# \"vf_coef\" : 0.5,\n",
	"# \"max_grad_norm\" : 0.5,\n",
	"# \"lam\" : 0.95,\n",
	"# \"nminibatches\" : 4,\n",
	"# \"noptepochs\" : 4,\n",
	"# \"cliprange\" :0.2,\n",
	"# \"cliprange_vf\" : None,\n",
	"# \"verbose\" : 1,\n",
	"# \"tensorboard_log\" : None,\n",
	"# \"_init_setup_model\" : True,\n",
	"# \"policy_kwargs\" : None,\n",
	"# \"full_tensorboard_log\" : False,\n",
	"# \"seed\" : None,\n",
	"# \"n_cpu_tf_sess\" : None\n",
	"# }\n",
	"\n",
	"# params=ppo_params, "
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "reBxig4blZyD",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"def train_save_agent(model, env, gif_name, time_steps=int(1e4), \n",
	" save_gif=False):\n",
	" #use the policy, environment and define params to compile the PPO2 model..\n",
	"# model = model #PPO2(\"MlpPolicy\", env)#, **params)\n",
	"\n",
	" s_time = time.time()\n",
	" #Train the model\n",
	" model.learn(total_timesteps=time_steps)\n",
	" e_time = time.time()\n",
	" tot_time = e_time - s_time\n",
	" print(f\"Total Run-Time : , {tot_time : 0.3f} seconds\")\n",
	"\n",
	" if save_gif:\n",
	" ########### Record-GIF ###########\n",
	" images = []\n",
	" obs = model.env.reset()\n",
	" img = model.env.render(mode='rgb_array')\n",
	" gif_length = 500\n",
	"\n",
	" for i in range(gif_length):\n",
	" images.append(img)\n",
	" action, _ = model.predict(obs)\n",
	" obs, _, _ ,_ = model.env.step(action)\n",
	" img = model.env.render(mode='rgb_array')\n",
	"\n",
	" imageio.mimsave(f'{gif_name}-{timesteps}.gif', [np.array(img) for i, img in enumerate(images) if i%2 == 0],\n",
	" fps=29)\n",
	" \n",
	" return model, tot_time"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"scrolled": true,
	"id": "z87sphMklZyG",
	"colab_type": "code",
	"colab": {},
	"outputId": "f5fbb6d8-eb0f-4caf-89f6-b8c14ade38ba"
	},
	"source": [
	"all_algs = [\"A2C\", \"PPO2\", \"ACER\", \"ACKTR\"]\n",
	"\n",
	"# Create log dir\n",
	"import os\n",
	"\n",
	"log_dir = \"/tmp/gym/\"\n",
	"os.makedirs(log_dir, exist_ok=True)\n",
	"env_list = [\"Pendulum-v0\", \"MountainCar-v0\", \"Acrobot-v1\", \"CartPole-v1\"]\n",
	"timesteps = int(1e6)\n",
	"num_cpu = 4 # Number of processes to use\n",
	"\n",
	"\n",
	"game_df = pd.DataFrame()\n",
	"\n",
	"for env_id in env_list:\n",
	" print(f\"{env_id}......\")\n",
	"# env = gym.make(env_id) #\n",
	" # Logs will be saved in log_dir/monitor.csv\n",
	"# env = Monitor(env, log_dir, allow_early_resets=True)\n",
	"# env = DummyVecEnv([lambda: gym.make(env_id)])\n",
	" env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])\n",
	" \n",
	" alg_detail_df = pd.DataFrame()\n",
	" for alg in all_algs:\n",
	" if env_id == \"Pendulum-v0\" and alg == 'ACER':\n",
	" print(f\"{env_id, alg}\")\n",
	" pass\n",
	" \n",
	" else:\n",
	" print(f'{alg}.....') \n",
	" model = eval(alg + \"('MlpPolicy', env)\")\n",
	" tr_model, run_time = train_save_agent(model, env, alg, time_steps=timesteps, save_gif=False)\n",
	" # mean_reward, std_reward = evaluate_policy(tr_model, tr_model.get_env(), n_eval_episodes=20)\n",
	" mean_reward = evaluate(tr_model, num_steps=1000)\n",
	"\n",
	" alg_detail_df = alg_detail_df.append([[env_id, alg, run_time, mean_reward]]) #, std_reward]])\n",
	"\n",
	" print(f\"Mean Reward : {mean_reward} \") #\"\| Std_Reward : {std_reward}\")\n",
	" \n",
	" game_df = game_df.append(alg_detail_df) #], axis=1)\n",
	" \n",
	"game_df.columns = ['Envir', 'Algorithm', 'Run_Time', 'Mean_Rewards'] #, 'Std_Rewards'] "
	],
	"execution_count": 0,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Pendulum-v0......\n",
	"A2C.....\n",
	"Total Run-Time : , 257.970 seconds\n",
	"Mean reward: -551.4 Num episodes: 24\n",
	"Mean Reward : -551.4 \n",
	"PPO2.....\n",
	"Total Run-Time : , 338.824 seconds\n",
	"Mean reward: -939.9 Num episodes: 24\n",
	"Mean Reward : -939.9 \n",
	"('Pendulum-v0', 'ACER')\n",
	"ACKTR.....\n",
	"Total Run-Time : , 242.154 seconds\n",
	"Mean reward: -876.8 Num episodes: 24\n",
	"Mean Reward : -876.8 \n",
	"MountainCar-v0......\n",
	"A2C.....\n",
	"Total Run-Time : , 237.251 seconds\n",
	"Mean reward: -166.7 Num episodes: 24\n",
	"Mean Reward : -166.7 \n",
	"PPO2.....\n",
	"Total Run-Time : , 690.426 seconds\n",
	"Mean reward: -166.7 Num episodes: 24\n",
	"Mean Reward : -166.7 \n",
	"ACER.....\n",
	"Total Run-Time : , 346.463 seconds\n",
	"Mean reward: -166.7 Num episodes: 24\n",
	"Mean Reward : -166.7 \n",
	"ACKTR.....\n",
	"Total Run-Time : , 234.560 seconds\n",
	"Mean reward: -166.7 Num episodes: 24\n",
	"Mean Reward : -166.7 \n",
	"Acrobot-v1......\n",
	"A2C.....\n",
	"Total Run-Time : , 290.163 seconds\n",
	"Mean reward: -89.6 Num episodes: 46\n",
	"Mean Reward : -89.6 \n",
	"PPO2.....\n",
	"Total Run-Time : , 273.282 seconds\n",
	"Mean reward: -74.9 Num episodes: 53\n",
	"Mean Reward : -74.9 \n",
	"ACER.....\n",
	"Total Run-Time : , 392.469 seconds\n",
	"Mean reward: -77.8 Num episodes: 51\n",
	"Mean Reward : -77.8 \n",
	"ACKTR.....\n",
	"Total Run-Time : , 253.496 seconds\n",
	"Mean reward: -72.3 Num episodes: 55\n",
	"Mean Reward : -72.3 \n",
	"CartPole-v1......\n",
	"A2C.....\n",
	"Total Run-Time : , 215.154 seconds\n",
	"Mean reward: 333.3 Num episodes: 12\n",
	"Mean Reward : 333.3 \n",
	"PPO2.....\n",
	"Total Run-Time : , 189.328 seconds\n",
	"Mean reward: 333.3 Num episodes: 12\n",
	"Mean Reward : 333.3 \n",
	"ACER.....\n",
	"Total Run-Time : , 300.396 seconds\n",
	"Mean reward: 333.3 Num episodes: 12\n",
	"Mean Reward : 333.3 \n",
	"ACKTR.....\n",
	"Total Run-Time : , 189.950 seconds\n",
	"Mean reward: 333.3 Num episodes: 12\n",
	"Mean Reward : 333.3 \n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "cBh97yTBlZyJ",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"# game_df.to_csv('Runtime_details.csv', index=False) "
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "3iWvj8_LlZyV",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	""
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "YBSRmVpRlZyX",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	""
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "nTa2uBOnlZya",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	""
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "Nf7E3NDGlZyc",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	""
	],
	"execution_count": 0,
	"outputs": []
	}
	]
	}