Ronnasayd/cartpole_dqn_play.py

## cartpole_dqn_play.py
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

ENV_NAME = 'CartPole-v0'

# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=0.01,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.load_weights('dqn_CartPole-v0_weights.h5f')
dqn.test(env, nb_episodes=15, visualize=True, verbose=2)

## frozen2_q_learning.py
import gym
import time
import numpy as np
import random

env = gym.make('FrozenLake8x8-v0')  # selecionar o ambiente

Q = np.ones(
    (env.observation_space.n, env.action_space.n), dtype=np.float32
)  # construir a tabela Q onde as linhas sao os estados possiveis e as colunas as ações

epsilon = 1
epsilon_decay = 0.9995
epsilon_min = 0.01

alpha = 1  # taxa de aprendizagem inicial
alpha_decay = 0.9995
alpha_min = 0.01  # taxa de aprendizagem minima

gamma = 0.9  # gamma para recompensas
MAX_NUMBER_STEPS = 10000
steps = 0

eta = max(alpha_min, alpha)
eps = max(epsilon_min, epsilon)
total_reward = 0

state = env.reset()  # inicializa o ambiente
while steps < MAX_NUMBER_STEPS:
    if random.uniform(
            0, 1) < eps:  # se um numero aleatorio for  menor que epsilon
        action = env.action_space.sample(
        )  # escolha uma ação aleatoria (exploration)
    else:
        action = np.argmax(
            Q[state, ]
        )  # escolha a ação que vai da o maior retorno para aquele estado (exploitation)
    new_state, reward, done, info = env.step(
        action)  # execute a ação e receba uma nova observação do ambiente

    total_reward += reward

    if done:
        target = reward
    else:
        target = reward + gamma * np.max(Q[new_state, ])

    Q[state, action] = (1 - eta) * Q[state, action] + eta * (
        target)  # atualize a tabela Q com a recompensa recebida pela ação
    state = new_state  # atualize o estado

    if done:
        print(
            f'steps: {steps} total_reward: {total_reward} alpha: {eta:.4f} epsilon: {eps:.4f} '
        )
        total_reward = 0
        state = env.reset()  # inicializa o ambiente
        steps += 1
        alpha = alpha * alpha_decay
        eta = max(alpha_min,
                  alpha)  #  faz o decaimento da taxa de aprendizagem
        epsilon = epsilon * epsilon_decay
        eps = max(epsilon_min, epsilon)

print("Testando...")

# teste apos treinamento
done = False
state = env.reset()
total_reward = 0
while not done:
    env.render()
    action = np.argmax(Q[state, ])
    state, reward, done, info = env.step(action)
    print(reward)
    time.sleep(1 / 24)

## frozen_q_learning.py
import gym
import time
import numpy as np
import random

env = gym.make('FrozenLake-v0')  # selecionar o ambiente

Q = np.ones(
    (env.observation_space.n, env.action_space.n), dtype=np.float32
)  # construir a tabela Q onde as linhas sao os estados possiveis e as colunas as ações

epsilon = 1
epsilon_decay = 0.99
epsilon_min = 0.01

alpha = 1  # taxa de aprendizagem inicial
alpha_decay = 0.999
alpha_min = 0.01  # taxa de aprendizagem minima

gamma = 0.9999  # gamma para recompensas
MAX_NUMBER_STEPS = 10000
steps = 0

eta = max(alpha_min, alpha)
eps = max(epsilon_min, epsilon)
total_reward = 0
state = env.reset()  # inicializa o ambiente
while steps < MAX_NUMBER_STEPS:
    if random.uniform(
            0, 1) < eps:  # se um numero aleatorio for  menor que epsilon
        action = env.action_space.sample(
        )  # escolha uma ação aleatoria (exploration)
    else:
        action = np.argmax(
            Q[state, ]
        )  # escolha a ação que vai da o maior retorno para aquele estado (exploitation)
    new_state, reward, done, info = env.step(
        action)  # execute a ação e receba uma nova observação do ambiente
    total_reward += reward

    if done:
        target = reward
    else:
        target = reward + gamma * np.max(Q[new_state, ])

    Q[state, action] = (1 - eta) * Q[state, action] + eta * (
        target)  # atualize a tabela Q com a recompensa recebida pela ação
    state = new_state  # atualize o estado

    if done:
        print(
            f'total_reward: {total_reward} steps: {steps} alpha: {eta} epsilon: {eps}'
        )
        total_reward = 0
        state = env.reset()  # inicializa o ambiente
        steps += 1
        alpha = alpha * alpha_decay
        eta = max(alpha_min,
                  alpha)  #  faz o decaimento da taxa de aprendizagem
        epsilon = epsilon * epsilon_decay
        eps = max(epsilon_min, epsilon)

print("Testando...")

# teste apos treinamento
done = False
state = env.reset()
total_reward = 0
while not done:
    env.render()
    action = np.argmax(Q[state, ])
    state, reward, done, info = env.step(action)
    print(reward)
    time.sleep(1 / 24)

## keras_rl_cartpole_dqn_example.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              keras_rl_cartpole_dqn_example.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## keras_rl_mountaincar_dqn_example.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "keras_rl_mountaincar_dqn_example.ipynb",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "1xKHjbyyLSLi",
        "colab_type": "code",
        "outputId": "fe6b7d47-aac2-4784-9c43-d9627ed06c16",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        }
      },
      "source": [
        "%tensorflow_version 1.x\n",
        "! pip install tensorflow==1.14\n",
        "! pip install keras-rl\n",
        "! pip install h5py"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Collecting tensorflow==1.14\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/de/f0/96fb2e0412ae9692dbf400e5b04432885f677ad6241c088ccc5fe7724d69/tensorflow-1.14.0-cp36-cp36m-manylinux1_x86_64.whl (109.2MB)\n",
            "\u001b[K     |████████████████████████████████| 109.2MB 97kB/s \n",
            "\u001b[?25hRequirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.33.6)\n",
            "Requirement already satisfied: gast>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.2.2)\n",
            "Collecting tensorboard<1.15.0,>=1.14.0\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/91/2d/2ed263449a078cd9c8a9ba50ebd50123adf1f8cfbea1492f9084169b89d9/tensorboard-1.14.0-py3-none-any.whl (3.1MB)\n",
            "\u001b[K     |████████████████████████████████| 3.2MB 33.8MB/s \n",
            "\u001b[?25hRequirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.8.1)\n",
            "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.15.0)\n",
            "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.1.0)\n",
            "Requirement already satisfied: numpy<2.0,>=1.14.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.17.5)\n",
            "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.12.0)\n",
            "Collecting tensorflow-estimator<1.15.0rc0,>=1.14.0rc0\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/3c/d5/21860a5b11caf0678fbc8319341b0ae21a07156911132e0e71bffed0510d/tensorflow_estimator-1.14.0-py2.py3-none-any.whl (488kB)\n",
            "\u001b[K     |████████████████████████████████| 491kB 39.7MB/s \n",
            "\u001b[?25hRequirement already satisfied: google-pasta>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.1.8)\n",
            "Requirement already satisfied: keras-applications>=1.0.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.0.8)\n",
            "Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (3.10.0)\n",
            "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.9.0)\n",
            "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.1.0)\n",
            "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.11.2)\n",
            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.15.0,>=1.14.0->tensorflow==1.14) (3.1.1)\n",
            "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.15.0,>=1.14.0->tensorflow==1.14) (42.0.2)\n",
            "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.15.0,>=1.14.0->tensorflow==1.14) (0.16.0)\n",
            "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.6->tensorflow==1.14) (2.8.0)\n",
            "Installing collected packages: tensorboard, tensorflow-estimator, tensorflow\n",
            "  Found existing installation: tensorboard 1.15.0\n",
            "    Uninstalling tensorboard-1.15.0:\n",
            "      Successfully uninstalled tensorboard-1.15.0\n",
            "  Found existing installation: tensorflow-estimator 1.15.1\n",
            "    Uninstalling tensorflow-estimator-1.15.1:\n",
            "      Successfully uninstalled tensorflow-estimator-1.15.1\n",
            "  Found existing installation: tensorflow 1.15.0\n",
            "    Uninstalling tensorflow-1.15.0:\n",
            "      Successfully uninstalled tensorflow-1.15.0\n",
            "Successfully installed tensorboard-1.14.0 tensorflow-1.14.0 tensorflow-estimator-1.14.0\n",
            "Collecting keras-rl\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/ab/87/4b57eff8e4bd834cea0a75cd6c58198c9e42be29b600db9c14fafa72ec07/keras-rl-0.4.2.tar.gz (40kB)\n",
            "\u001b[K     |████████████████████████████████| 40kB 2.8MB/s \n",
            "\u001b[?25hRequirement already satisfied: keras>=2.0.7 in /usr/local/lib/python3.6/dist-packages (from keras-rl) (2.2.5)\n",
            "Requirement already satisfied: keras-applications>=1.0.8 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.0.8)\n",
            "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.12.0)\n",
            "Requirement already satisfied: keras-preprocessing>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.1.0)\n",
            "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (2.8.0)\n",
            "Requirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.17.5)\n",
            "Requirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.4.1)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (3.13)\n",
            "Building wheels for collected packages: keras-rl\n",
            "  Building wheel for keras-rl (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for keras-rl: filename=keras_rl-0.4.2-cp36-none-any.whl size=48379 sha256=90524452b9bfd6deb6d6240251ea5739c97ba6efb36087eefb436f2059558d6e\n",
            "  Stored in directory: /root/.cache/pip/wheels/7d/4d/84/9254c9f2e8f51865cb0dac8e79da85330c735551d31f73c894\n",
            "Successfully built keras-rl\n",
            "Installing collected packages: keras-rl\n",
            "Successfully installed keras-rl-0.4.2\n",
            "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (2.8.0)\n",
            "Requirement already satisfied: numpy>=1.7 in /usr/local/lib/python3.6/dist-packages (from h5py) (1.17.5)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from h5py) (1.12.0)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6YyJKUdwLlUA",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 462
        },
        "outputId": "2e6fc84a-34d7-452e-b441-c549403f55e0"
      },
      "source": [
        "import numpy as np\n",
        "import gym\n",
        "\n",
        "from keras.models import Sequential\n",
        "from keras.layers import Dense, Activation, Flatten\n",
        "from keras.optimizers import Adam\n",
        "\n",
        "from rl.agents.dqn import DQNAgent\n",
        "from rl.policy import EpsGreedyQPolicy\n",
        "from rl.memory import SequentialMemory"
      ],
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Using TensorFlow backend.\n",
            "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
            "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
            "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
            "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
            "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
            "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
            "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
            "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
            "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
            "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
            "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
            "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
            "/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
            "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
            "/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
            "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
            "/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
            "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
            "/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
            "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
            "/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
            "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
            "/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
            "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Gih5PP3eLsVm",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "ENV_NAME = 'MountainCar-v0'\n",
        "\n",
        "\n",
        "# Get the environment and extract the number of actions.\n",
        "env = gym.make(ENV_NAME)\n",
        "np.random.seed(123)\n",
        "env.seed(123)\n",
        "nb_actions = env.action_space.n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "cfRQ7-j8LuiP",
        "colab_type": "code",
        "outputId": "2e452de5-348d-41a7-87f0-9e809bfb224c",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 598
        }
      },
      "source": [
        "\n",
        "# Next, we build a very simple model.\n",
        "model = Sequential()\n",
        "model.add(Flatten(input_shape=(1,) + env.observation_space.shape))\n",
        "model.add(Dense(16))\n",
        "model.add(Activation('relu'))\n",
        "model.add(Dense(16))\n",
        "model.add(Activation('relu'))\n",
        "model.add(Dense(16))\n",
        "model.add(Activation('relu'))\n",
        "model.add(Dense(nb_actions))\n",
        "model.add(Activation('linear'))\n",
        "print(model.summary())"
      ],
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:66: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.\n",
            "\n",
            "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:541: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n",
            "\n",
            "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4432: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n",
            "\n",
            "Model: \"sequential_1\"\n",
            "_________________________________________________________________\n",
            "Layer (type)                 Output Shape              Param #   \n",
            "=================================================================\n",
            "flatten_1 (Flatten)          (None, 2)                 0         \n",
            "_________________________________________________________________\n",
            "dense_1 (Dense)              (None, 16)                48        \n",
            "_________________________________________________________________\n",
            "activation_1 (Activation)    (None, 16)                0         \n",
            "_________________________________________________________________\n",
            "dense_2 (Dense)              (None, 16)                272       \n",
            "_________________________________________________________________\n",
            "activation_2 (Activation)    (None, 16)                0         \n",
            "_________________________________________________________________\n",
            "dense_3 (Dense)              (None, 16)                272       \n",
            "_________________________________________________________________\n",
            "activation_3 (Activation)    (None, 16)                0         \n",
            "_________________________________________________________________\n",
            "dense_4 (Dense)              (None, 3)                 51        \n",
            "_________________________________________________________________\n",
            "activation_4 (Activation)    (None, 3)                 0         \n",
            "=================================================================\n",
            "Total params: 643\n",
            "Trainable params: 643\n",
            "Non-trainable params: 0\n",
            "_________________________________________________________________\n",
            "None\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "pTeDxvrhL2Q0",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 139
        },
        "outputId": "da476695-79d0-4bae-b737-8e29aae1a3a7"
      },
      "source": [
        "# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and\n",
        "# even the metrics!\n",
        "memory = SequentialMemory(limit=50000, window_length=1)\n",
        "policy = EpsGreedyQPolicy()\n",
        "dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,target_model_update=0.01, policy=policy)\n",
        "dqn.compile(Adam(lr=1e-3), metrics=['mae'])"
      ],
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:190: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.\n",
            "\n",
            "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:197: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n",
            "\n",
            "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:793: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "P2dQE4m7Yisl",
        "colab_type": "code",
        "outputId": "15298657-62cf-43c8-b1d2-a07c6efe53d1",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 428
        }
      },
      "source": [
        "# Okay, now it's time to learn something! We visualize the training here for show, but this\n",
        "# slows down training quite a lot. You can always safely abort the training prematurely using\n",
        "# Ctrl + C.\n",
        "dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)"
      ],
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Training for 50000 steps ...\n",
            "Interval 1 (0 steps performed)\n",
            "    1/10000 [..............................] - ETA: 8:06 - reward: -1.0000"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.6/dist-packages/rl/memory.py:39: UserWarning: Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!\n",
            "  warnings.warn('Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!')\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "10000/10000 [==============================] - 34s 3ms/step - reward: -1.0000\n",
            "50 episodes - episode_reward: -196.520 [-200.000, -128.000] - loss: 2.426 - mean_absolute_error: 19.995 - mean_q: -29.657\n",
            "\n",
            "Interval 2 (10000 steps performed)\n",
            "10000/10000 [==============================] - 32s 3ms/step - reward: -1.0000\n",
            "54 episodes - episode_reward: -185.222 [-200.000, -115.000] - loss: 6.155 - mean_absolute_error: 37.769 - mean_q: -56.012\n",
            "\n",
            "Interval 3 (20000 steps performed)\n",
            "10000/10000 [==============================] - 31s 3ms/step - reward: -1.0000\n",
            "57 episodes - episode_reward: -177.807 [-200.000, -100.000] - loss: 6.252 - mean_absolute_error: 38.103 - mean_q: -56.204\n",
            "\n",
            "Interval 4 (30000 steps performed)\n",
            "10000/10000 [==============================] - 32s 3ms/step - reward: -1.0000\n",
            "60 episodes - episode_reward: -164.333 [-200.000, -104.000] - loss: 3.864 - mean_absolute_error: 34.893 - mean_q: -51.453\n",
            "\n",
            "Interval 5 (40000 steps performed)\n",
            "10000/10000 [==============================] - 32s 3ms/step - reward: -1.0000\n",
            "done, took 160.995 seconds\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<keras.callbacks.History at 0x7facdc9e84a8>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 6
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mmykvkN7aQIy",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# After training is done, we save the final weights.\n",
        "dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)\n",
        "from google.colab import files\n",
        "files.download('dqn_{}_weights.h5f'.format(ENV_NAME)) "
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "r_xyZdh_ZSlK",
        "colab_type": "code",
        "outputId": "705f44c0-dbe2-4b54-9625-6b244dc9ef8a",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 136
        }
      },
      "source": [
        "# Finally, evaluate our algorithm for 5 episodes.\n",
        "dqn.test(env, nb_episodes=5, visualize=False)"
      ],
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Testing for 5 episodes ...\n",
            "Episode 1: reward: -111.000, steps: 111\n",
            "Episode 2: reward: -96.000, steps: 96\n",
            "Episode 3: reward: -101.000, steps: 101\n",
            "Episode 4: reward: -89.000, steps: 89\n",
            "Episode 5: reward: -88.000, steps: 88\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<keras.callbacks.History at 0x7facbe6bd908>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 8
        }
      ]
    }
  ]
}

## managerSave.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              managerSave.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## mountaincar_dqn_play.py
import numpy as np
import gym
import time

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

ENV_NAME = 'MountainCar-v0'

# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=0.01,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.load_weights('dqn_MountainCar-v0_weights.h5f')

dqn.test(env, nb_episodes=15, visualize=True)

## pokemonDCGAN.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              pokemonDCGAN.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## pokemonpix2pix.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              pokemonpix2pix.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## requirements.txt

      
    Raw
  

              requirements.txt
            
          
            View raw
              (Sorry about that, but we can’t show files that are this big right now.)
        
    
## taxi_q_learning.py

      
    Raw
  

              taxi_q_learning.py
            
          
            View raw
              (Sorry about that, but we can’t show files that are this big right now.)
        
    
## word_embeddings.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              word_embeddings.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	import numpy as np
	import gym

	from keras.models import Sequential
	from keras.layers import Dense, Activation, Flatten
	from keras.optimizers import Adam

	from rl.agents.dqn import DQNAgent
	from rl.policy import EpsGreedyQPolicy
	from rl.memory import SequentialMemory

	ENV_NAME = 'CartPole-v0'

	# Get the environment and extract the number of actions.
	env = gym.make(ENV_NAME)
	np.random.seed(123)
	env.seed(123)
	nb_actions = env.action_space.n

	# Next, we build a very simple model.
	model = Sequential()
	model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
	model.add(Dense(16))
	model.add(Activation('relu'))
	model.add(Dense(16))
	model.add(Activation('relu'))
	model.add(Dense(16))
	model.add(Activation('relu'))
	model.add(Dense(nb_actions))
	model.add(Activation('linear'))
	print(model.summary())

	# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
	# even the metrics!
	memory = SequentialMemory(limit=50000, window_length=1)
	policy = EpsGreedyQPolicy()
	dqn = DQNAgent(model=model,
	nb_actions=nb_actions,
	memory=memory,
	nb_steps_warmup=10,
	target_model_update=0.01,
	policy=policy)
	dqn.compile(Adam(lr=1e-3), metrics=['mae'])
	dqn.load_weights('dqn_CartPole-v0_weights.h5f')
	dqn.test(env, nb_episodes=15, visualize=True, verbose=2)
	import gym
	import time
	import numpy as np
	import random

	env = gym.make('FrozenLake8x8-v0') # selecionar o ambiente

	Q = np.ones(
	(env.observation_space.n, env.action_space.n), dtype=np.float32
	) # construir a tabela Q onde as linhas sao os estados possiveis e as colunas as ações

	epsilon = 1
	epsilon_decay = 0.9995
	epsilon_min = 0.01

	alpha = 1 # taxa de aprendizagem inicial
	alpha_decay = 0.9995
	alpha_min = 0.01 # taxa de aprendizagem minima

	gamma = 0.9 # gamma para recompensas
	MAX_NUMBER_STEPS = 10000
	steps = 0

	eta = max(alpha_min, alpha)
	eps = max(epsilon_min, epsilon)
	total_reward = 0

	state = env.reset() # inicializa o ambiente
	while steps < MAX_NUMBER_STEPS:
	if random.uniform(
	0, 1) < eps: # se um numero aleatorio for menor que epsilon
	action = env.action_space.sample(
	) # escolha uma ação aleatoria (exploration)
	else:
	action = np.argmax(
	Q[state, ]
	) # escolha a ação que vai da o maior retorno para aquele estado (exploitation)
	new_state, reward, done, info = env.step(
	action) # execute a ação e receba uma nova observação do ambiente

	total_reward += reward

	if done:
	target = reward
	else:
	target = reward + gamma * np.max(Q[new_state, ])

	Q[state, action] = (1 - eta) * Q[state, action] + eta * (
	target) # atualize a tabela Q com a recompensa recebida pela ação
	state = new_state # atualize o estado

	if done:
	print(
	f'steps: {steps} total_reward: {total_reward} alpha: {eta:.4f} epsilon: {eps:.4f} '
	)
	total_reward = 0
	state = env.reset() # inicializa o ambiente
	steps += 1
	alpha = alpha * alpha_decay
	eta = max(alpha_min,
	alpha) # faz o decaimento da taxa de aprendizagem
	epsilon = epsilon * epsilon_decay
	eps = max(epsilon_min, epsilon)

	print("Testando...")

	# teste apos treinamento
	done = False
	state = env.reset()
	total_reward = 0
	while not done:
	env.render()
	action = np.argmax(Q[state, ])
	state, reward, done, info = env.step(action)
	print(reward)
	time.sleep(1 / 24)
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "keras_rl_mountaincar_dqn_example.ipynb",
	"provenance": [],
	"collapsed_sections": []
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"accelerator": "GPU"
	},
	"cells": [
	{
	"cell_type": "code",
	"metadata": {
	"id": "1xKHjbyyLSLi",
	"colab_type": "code",
	"outputId": "fe6b7d47-aac2-4784-9c43-d9627ed06c16",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 1000
	}
	},
	"source": [
	"%tensorflow_version 1.x\n",
	"! pip install tensorflow==1.14\n",
	"! pip install keras-rl\n",
	"! pip install h5py"
	],
	"execution_count": 1,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Collecting tensorflow==1.14\n",
	"\u001b[?25l Downloading https://files.pythonhosted.org/packages/de/f0/96fb2e0412ae9692dbf400e5b04432885f677ad6241c088ccc5fe7724d69/tensorflow-1.14.0-cp36-cp36m-manylinux1_x86_64.whl (109.2MB)\n",
	"\u001b[K \|████████████████████████████████\| 109.2MB 97kB/s \n",
	"\u001b[?25hRequirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.33.6)\n",
	"Requirement already satisfied: gast>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.2.2)\n",
	"Collecting tensorboard<1.15.0,>=1.14.0\n",
	"\u001b[?25l Downloading https://files.pythonhosted.org/packages/91/2d/2ed263449a078cd9c8a9ba50ebd50123adf1f8cfbea1492f9084169b89d9/tensorboard-1.14.0-py3-none-any.whl (3.1MB)\n",
	"\u001b[K \|████████████████████████████████\| 3.2MB 33.8MB/s \n",
	"\u001b[?25hRequirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.8.1)\n",
	"Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.15.0)\n",
	"Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.1.0)\n",
	"Requirement already satisfied: numpy<2.0,>=1.14.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.17.5)\n",
	"Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.12.0)\n",
	"Collecting tensorflow-estimator<1.15.0rc0,>=1.14.0rc0\n",
	"\u001b[?25l Downloading https://files.pythonhosted.org/packages/3c/d5/21860a5b11caf0678fbc8319341b0ae21a07156911132e0e71bffed0510d/tensorflow_estimator-1.14.0-py2.py3-none-any.whl (488kB)\n",
	"\u001b[K \|████████████████████████████████\| 491kB 39.7MB/s \n",
	"\u001b[?25hRequirement already satisfied: google-pasta>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.1.8)\n",
	"Requirement already satisfied: keras-applications>=1.0.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.0.8)\n",
	"Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (3.10.0)\n",
	"Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.9.0)\n",
	"Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.1.0)\n",
	"Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.11.2)\n",
	"Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.15.0,>=1.14.0->tensorflow==1.14) (3.1.1)\n",
	"Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.15.0,>=1.14.0->tensorflow==1.14) (42.0.2)\n",
	"Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.15.0,>=1.14.0->tensorflow==1.14) (0.16.0)\n",
	"Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.6->tensorflow==1.14) (2.8.0)\n",
	"Installing collected packages: tensorboard, tensorflow-estimator, tensorflow\n",
	" Found existing installation: tensorboard 1.15.0\n",
	" Uninstalling tensorboard-1.15.0:\n",
	" Successfully uninstalled tensorboard-1.15.0\n",
	" Found existing installation: tensorflow-estimator 1.15.1\n",
	" Uninstalling tensorflow-estimator-1.15.1:\n",
	" Successfully uninstalled tensorflow-estimator-1.15.1\n",
	" Found existing installation: tensorflow 1.15.0\n",
	" Uninstalling tensorflow-1.15.0:\n",
	" Successfully uninstalled tensorflow-1.15.0\n",
	"Successfully installed tensorboard-1.14.0 tensorflow-1.14.0 tensorflow-estimator-1.14.0\n",
	"Collecting keras-rl\n",
	"\u001b[?25l Downloading https://files.pythonhosted.org/packages/ab/87/4b57eff8e4bd834cea0a75cd6c58198c9e42be29b600db9c14fafa72ec07/keras-rl-0.4.2.tar.gz (40kB)\n",
	"\u001b[K \|████████████████████████████████\| 40kB 2.8MB/s \n",
	"\u001b[?25hRequirement already satisfied: keras>=2.0.7 in /usr/local/lib/python3.6/dist-packages (from keras-rl) (2.2.5)\n",
	"Requirement already satisfied: keras-applications>=1.0.8 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.0.8)\n",
	"Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.12.0)\n",
	"Requirement already satisfied: keras-preprocessing>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.1.0)\n",
	"Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (2.8.0)\n",
	"Requirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.17.5)\n",
	"Requirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.4.1)\n",
	"Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (3.13)\n",
	"Building wheels for collected packages: keras-rl\n",
	" Building wheel for keras-rl (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
	" Created wheel for keras-rl: filename=keras_rl-0.4.2-cp36-none-any.whl size=48379 sha256=90524452b9bfd6deb6d6240251ea5739c97ba6efb36087eefb436f2059558d6e\n",
	" Stored in directory: /root/.cache/pip/wheels/7d/4d/84/9254c9f2e8f51865cb0dac8e79da85330c735551d31f73c894\n",
	"Successfully built keras-rl\n",
	"Installing collected packages: keras-rl\n",
	"Successfully installed keras-rl-0.4.2\n",
	"Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (2.8.0)\n",
	"Requirement already satisfied: numpy>=1.7 in /usr/local/lib/python3.6/dist-packages (from h5py) (1.17.5)\n",
	"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from h5py) (1.12.0)\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "6YyJKUdwLlUA",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 462
	},
	"outputId": "2e6fc84a-34d7-452e-b441-c549403f55e0"
	},
	"source": [
	"import numpy as np\n",
	"import gym\n",
	"\n",
	"from keras.models import Sequential\n",
	"from keras.layers import Dense, Activation, Flatten\n",
	"from keras.optimizers import Adam\n",
	"\n",
	"from rl.agents.dqn import DQNAgent\n",
	"from rl.policy import EpsGreedyQPolicy\n",
	"from rl.memory import SequentialMemory"
	],
	"execution_count": 2,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Using TensorFlow backend.\n",
	"/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
	" _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
	"/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
	" _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
	"/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
	" _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
	"/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
	" _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
	"/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
	" _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
	"/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
	" np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
	"/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
	" _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
	"/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
	" _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
	"/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
	" _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
	"/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
	" _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
	"/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
	" _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
	"/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
	" np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
	],
	"name": "stderr"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "Gih5PP3eLsVm",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"ENV_NAME = 'MountainCar-v0'\n",
	"\n",
	"\n",
	"# Get the environment and extract the number of actions.\n",
	"env = gym.make(ENV_NAME)\n",
	"np.random.seed(123)\n",
	"env.seed(123)\n",
	"nb_actions = env.action_space.n"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "cfRQ7-j8LuiP",
	"colab_type": "code",
	"outputId": "2e452de5-348d-41a7-87f0-9e809bfb224c",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 598
	}
	},
	"source": [
	"\n",
	"# Next, we build a very simple model.\n",
	"model = Sequential()\n",
	"model.add(Flatten(input_shape=(1,) + env.observation_space.shape))\n",
	"model.add(Dense(16))\n",
	"model.add(Activation('relu'))\n",
	"model.add(Dense(16))\n",
	"model.add(Activation('relu'))\n",
	"model.add(Dense(16))\n",
	"model.add(Activation('relu'))\n",
	"model.add(Dense(nb_actions))\n",
	"model.add(Activation('linear'))\n",
	"print(model.summary())"
	],
	"execution_count": 4,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:66: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.\n",
	"\n",
	"WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:541: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n",
	"\n",
	"WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4432: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n",
	"\n",
	"Model: \"sequential_1\"\n",
	"_________________________________________________________________\n",
	"Layer (type) Output Shape Param # \n",
	"=================================================================\n",
	"flatten_1 (Flatten) (None, 2) 0 \n",
	"_________________________________________________________________\n",
	"dense_1 (Dense) (None, 16) 48 \n",
	"_________________________________________________________________\n",
	"activation_1 (Activation) (None, 16) 0 \n",
	"_________________________________________________________________\n",
	"dense_2 (Dense) (None, 16) 272 \n",
	"_________________________________________________________________\n",
	"activation_2 (Activation) (None, 16) 0 \n",
	"_________________________________________________________________\n",
	"dense_3 (Dense) (None, 16) 272 \n",
	"_________________________________________________________________\n",
	"activation_3 (Activation) (None, 16) 0 \n",
	"_________________________________________________________________\n",
	"dense_4 (Dense) (None, 3) 51 \n",
	"_________________________________________________________________\n",
	"activation_4 (Activation) (None, 3) 0 \n",
	"=================================================================\n",
	"Total params: 643\n",
	"Trainable params: 643\n",
	"Non-trainable params: 0\n",
	"_________________________________________________________________\n",
	"None\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "pTeDxvrhL2Q0",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 139
	},
	"outputId": "da476695-79d0-4bae-b737-8e29aae1a3a7"
	},
	"source": [
	"# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and\n",
	"# even the metrics!\n",
	"memory = SequentialMemory(limit=50000, window_length=1)\n",
	"policy = EpsGreedyQPolicy()\n",
	"dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,target_model_update=0.01, policy=policy)\n",
	"dqn.compile(Adam(lr=1e-3), metrics=['mae'])"
	],
	"execution_count": 5,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:190: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.\n",
	"\n",
	"WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:197: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n",
	"\n",
	"WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:793: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n",
	"\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "P2dQE4m7Yisl",
	"colab_type": "code",
	"outputId": "15298657-62cf-43c8-b1d2-a07c6efe53d1",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 428
	}
	},
	"source": [
	"# Okay, now it's time to learn something! We visualize the training here for show, but this\n",
	"# slows down training quite a lot. You can always safely abort the training prematurely using\n",
	"# Ctrl + C.\n",
	"dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)"
	],
	"execution_count": 6,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Training for 50000 steps ...\n",
	"Interval 1 (0 steps performed)\n",
	" 1/10000 [..............................] - ETA: 8:06 - reward: -1.0000"
	],
	"name": "stdout"
	},
	{
	"output_type": "stream",
	"text": [
	"/usr/local/lib/python3.6/dist-packages/rl/memory.py:39: UserWarning: Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!\n",
	" warnings.warn('Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!')\n"
	],
	"name": "stderr"
	},
	{
	"output_type": "stream",
	"text": [
	"10000/10000 [==============================] - 34s 3ms/step - reward: -1.0000\n",
	"50 episodes - episode_reward: -196.520 [-200.000, -128.000] - loss: 2.426 - mean_absolute_error: 19.995 - mean_q: -29.657\n",
	"\n",
	"Interval 2 (10000 steps performed)\n",
	"10000/10000 [==============================] - 32s 3ms/step - reward: -1.0000\n",
	"54 episodes - episode_reward: -185.222 [-200.000, -115.000] - loss: 6.155 - mean_absolute_error: 37.769 - mean_q: -56.012\n",
	"\n",
	"Interval 3 (20000 steps performed)\n",
	"10000/10000 [==============================] - 31s 3ms/step - reward: -1.0000\n",
	"57 episodes - episode_reward: -177.807 [-200.000, -100.000] - loss: 6.252 - mean_absolute_error: 38.103 - mean_q: -56.204\n",
	"\n",
	"Interval 4 (30000 steps performed)\n",
	"10000/10000 [==============================] - 32s 3ms/step - reward: -1.0000\n",
	"60 episodes - episode_reward: -164.333 [-200.000, -104.000] - loss: 3.864 - mean_absolute_error: 34.893 - mean_q: -51.453\n",
	"\n",
	"Interval 5 (40000 steps performed)\n",
	"10000/10000 [==============================] - 32s 3ms/step - reward: -1.0000\n",
	"done, took 160.995 seconds\n"
	],
	"name": "stdout"
	},
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"<keras.callbacks.History at 0x7facdc9e84a8>"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 6
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "mmykvkN7aQIy",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"# After training is done, we save the final weights.\n",
	"dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)\n",
	"from google.colab import files\n",
	"files.download('dqn_{}_weights.h5f'.format(ENV_NAME)) "
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "r_xyZdh_ZSlK",
	"colab_type": "code",
	"outputId": "705f44c0-dbe2-4b54-9625-6b244dc9ef8a",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 136
	}
	},
	"source": [
	"# Finally, evaluate our algorithm for 5 episodes.\n",
	"dqn.test(env, nb_episodes=5, visualize=False)"
	],
	"execution_count": 8,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Testing for 5 episodes ...\n",
	"Episode 1: reward: -111.000, steps: 111\n",
	"Episode 2: reward: -96.000, steps: 96\n",
	"Episode 3: reward: -101.000, steps: 101\n",
	"Episode 4: reward: -89.000, steps: 89\n",
	"Episode 5: reward: -88.000, steps: 88\n"
	],
	"name": "stdout"
	},
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"<keras.callbacks.History at 0x7facbe6bd908>"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 8
	}
	]
	}
	]
	}