Created
October 13, 2020 20:50
-
-
Save Ronnasayd/25466d09a40c58b962d58bc7f50dcb42 to your computer and use it in GitHub Desktop.
Algoritmos e implementações para diversas redes neurais
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import gym | |
from keras.models import Sequential | |
from keras.layers import Dense, Activation, Flatten | |
from keras.optimizers import Adam | |
from rl.agents.dqn import DQNAgent | |
from rl.policy import EpsGreedyQPolicy | |
from rl.memory import SequentialMemory | |
ENV_NAME = 'CartPole-v0' | |
# Get the environment and extract the number of actions. | |
env = gym.make(ENV_NAME) | |
np.random.seed(123) | |
env.seed(123) | |
nb_actions = env.action_space.n | |
# Next, we build a very simple model. | |
model = Sequential() | |
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) | |
model.add(Dense(16)) | |
model.add(Activation('relu')) | |
model.add(Dense(16)) | |
model.add(Activation('relu')) | |
model.add(Dense(16)) | |
model.add(Activation('relu')) | |
model.add(Dense(nb_actions)) | |
model.add(Activation('linear')) | |
print(model.summary()) | |
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and | |
# even the metrics! | |
memory = SequentialMemory(limit=50000, window_length=1) | |
policy = EpsGreedyQPolicy() | |
dqn = DQNAgent(model=model, | |
nb_actions=nb_actions, | |
memory=memory, | |
nb_steps_warmup=10, | |
target_model_update=0.01, | |
policy=policy) | |
dqn.compile(Adam(lr=1e-3), metrics=['mae']) | |
dqn.load_weights('dqn_CartPole-v0_weights.h5f') | |
dqn.test(env, nb_episodes=15, visualize=True, verbose=2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
import time | |
import numpy as np | |
import random | |
env = gym.make('FrozenLake8x8-v0') # selecionar o ambiente | |
Q = np.ones( | |
(env.observation_space.n, env.action_space.n), dtype=np.float32 | |
) # construir a tabela Q onde as linhas sao os estados possiveis e as colunas as ações | |
epsilon = 1 | |
epsilon_decay = 0.9995 | |
epsilon_min = 0.01 | |
alpha = 1 # taxa de aprendizagem inicial | |
alpha_decay = 0.9995 | |
alpha_min = 0.01 # taxa de aprendizagem minima | |
gamma = 0.9 # gamma para recompensas | |
MAX_NUMBER_STEPS = 10000 | |
steps = 0 | |
eta = max(alpha_min, alpha) | |
eps = max(epsilon_min, epsilon) | |
total_reward = 0 | |
state = env.reset() # inicializa o ambiente | |
while steps < MAX_NUMBER_STEPS: | |
if random.uniform( | |
0, 1) < eps: # se um numero aleatorio for menor que epsilon | |
action = env.action_space.sample( | |
) # escolha uma ação aleatoria (exploration) | |
else: | |
action = np.argmax( | |
Q[state, ] | |
) # escolha a ação que vai da o maior retorno para aquele estado (exploitation) | |
new_state, reward, done, info = env.step( | |
action) # execute a ação e receba uma nova observação do ambiente | |
total_reward += reward | |
if done: | |
target = reward | |
else: | |
target = reward + gamma * np.max(Q[new_state, ]) | |
Q[state, action] = (1 - eta) * Q[state, action] + eta * ( | |
target) # atualize a tabela Q com a recompensa recebida pela ação | |
state = new_state # atualize o estado | |
if done: | |
print( | |
f'steps: {steps} total_reward: {total_reward} alpha: {eta:.4f} epsilon: {eps:.4f} ' | |
) | |
total_reward = 0 | |
state = env.reset() # inicializa o ambiente | |
steps += 1 | |
alpha = alpha * alpha_decay | |
eta = max(alpha_min, | |
alpha) # faz o decaimento da taxa de aprendizagem | |
epsilon = epsilon * epsilon_decay | |
eps = max(epsilon_min, epsilon) | |
print("Testando...") | |
# teste apos treinamento | |
done = False | |
state = env.reset() | |
total_reward = 0 | |
while not done: | |
env.render() | |
action = np.argmax(Q[state, ]) | |
state, reward, done, info = env.step(action) | |
print(reward) | |
time.sleep(1 / 24) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
import time | |
import numpy as np | |
import random | |
env = gym.make('FrozenLake-v0') # selecionar o ambiente | |
Q = np.ones( | |
(env.observation_space.n, env.action_space.n), dtype=np.float32 | |
) # construir a tabela Q onde as linhas sao os estados possiveis e as colunas as ações | |
epsilon = 1 | |
epsilon_decay = 0.99 | |
epsilon_min = 0.01 | |
alpha = 1 # taxa de aprendizagem inicial | |
alpha_decay = 0.999 | |
alpha_min = 0.01 # taxa de aprendizagem minima | |
gamma = 0.9999 # gamma para recompensas | |
MAX_NUMBER_STEPS = 10000 | |
steps = 0 | |
eta = max(alpha_min, alpha) | |
eps = max(epsilon_min, epsilon) | |
total_reward = 0 | |
state = env.reset() # inicializa o ambiente | |
while steps < MAX_NUMBER_STEPS: | |
if random.uniform( | |
0, 1) < eps: # se um numero aleatorio for menor que epsilon | |
action = env.action_space.sample( | |
) # escolha uma ação aleatoria (exploration) | |
else: | |
action = np.argmax( | |
Q[state, ] | |
) # escolha a ação que vai da o maior retorno para aquele estado (exploitation) | |
new_state, reward, done, info = env.step( | |
action) # execute a ação e receba uma nova observação do ambiente | |
total_reward += reward | |
if done: | |
target = reward | |
else: | |
target = reward + gamma * np.max(Q[new_state, ]) | |
Q[state, action] = (1 - eta) * Q[state, action] + eta * ( | |
target) # atualize a tabela Q com a recompensa recebida pela ação | |
state = new_state # atualize o estado | |
if done: | |
print( | |
f'total_reward: {total_reward} steps: {steps} alpha: {eta} epsilon: {eps}' | |
) | |
total_reward = 0 | |
state = env.reset() # inicializa o ambiente | |
steps += 1 | |
alpha = alpha * alpha_decay | |
eta = max(alpha_min, | |
alpha) # faz o decaimento da taxa de aprendizagem | |
epsilon = epsilon * epsilon_decay | |
eps = max(epsilon_min, epsilon) | |
print("Testando...") | |
# teste apos treinamento | |
done = False | |
state = env.reset() | |
total_reward = 0 | |
while not done: | |
env.render() | |
action = np.argmax(Q[state, ]) | |
state, reward, done, info = env.step(action) | |
print(reward) | |
time.sleep(1 / 24) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "keras-rl-cartpole_dqn_example.ipynb", | |
"provenance": [], | |
"collapsed_sections": [] | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"accelerator": "GPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "1xKHjbyyLSLi", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 581 | |
}, | |
"outputId": "b4581018-6e36-4ecc-e4be-b316aade53cd" | |
}, | |
"source": [ | |
"%tensorflow_version 1.x\n", | |
"! pip install tensorflow==1.14\n", | |
"! pip install keras-rl\n", | |
"! pip install h5py" | |
], | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Requirement already satisfied: tensorflow==1.14 in /usr/local/lib/python3.6/dist-packages (1.14.0)\n", | |
"Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.33.6)\n", | |
"Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.1.0)\n", | |
"Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.9.0)\n", | |
"Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.8.1)\n", | |
"Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.1.0)\n", | |
"Requirement already satisfied: numpy<2.0,>=1.14.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.17.5)\n", | |
"Requirement already satisfied: tensorboard<1.15.0,>=1.14.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.14.0)\n", | |
"Requirement already satisfied: gast>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.2.2)\n", | |
"Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.12.0)\n", | |
"Requirement already satisfied: tensorflow-estimator<1.15.0rc0,>=1.14.0rc0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.14.0)\n", | |
"Requirement already satisfied: keras-applications>=1.0.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.0.8)\n", | |
"Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.11.2)\n", | |
"Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (1.15.0)\n", | |
"Requirement already satisfied: google-pasta>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (0.1.8)\n", | |
"Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.14) (3.10.0)\n", | |
"Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.15.0,>=1.14.0->tensorflow==1.14) (3.1.1)\n", | |
"Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.15.0,>=1.14.0->tensorflow==1.14) (42.0.2)\n", | |
"Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.15.0,>=1.14.0->tensorflow==1.14) (0.16.0)\n", | |
"Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.6->tensorflow==1.14) (2.8.0)\n", | |
"Requirement already satisfied: keras-rl in /usr/local/lib/python3.6/dist-packages (0.4.2)\n", | |
"Requirement already satisfied: keras>=2.0.7 in /usr/local/lib/python3.6/dist-packages (from keras-rl) (2.2.5)\n", | |
"Requirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.4.1)\n", | |
"Requirement already satisfied: keras-preprocessing>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.1.0)\n", | |
"Requirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.17.5)\n", | |
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (3.13)\n", | |
"Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.12.0)\n", | |
"Requirement already satisfied: keras-applications>=1.0.8 in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (1.0.8)\n", | |
"Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras>=2.0.7->keras-rl) (2.8.0)\n", | |
"Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (2.8.0)\n", | |
"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from h5py) (1.12.0)\n", | |
"Requirement already satisfied: numpy>=1.7 in /usr/local/lib/python3.6/dist-packages (from h5py) (1.17.5)\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "6YyJKUdwLlUA", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"import numpy as np\n", | |
"import gym\n", | |
"\n", | |
"from keras.models import Sequential\n", | |
"from keras.layers import Dense, Activation, Flatten\n", | |
"from keras.optimizers import Adam\n", | |
"\n", | |
"from rl.agents.dqn import DQNAgent\n", | |
"from rl.policy import EpsGreedyQPolicy\n", | |
"from rl.memory import SequentialMemory" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Gih5PP3eLsVm", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"ENV_NAME = 'CartPole-v0'\n", | |
"\n", | |
"\n", | |
"# Get the environment and extract the number of actions.\n", | |
"env = gym.make(ENV_NAME)\n", | |
"np.random.seed(123)\n", | |
"env.seed(123)\n", | |
"nb_actions = env.action_space.n" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "cfRQ7-j8LuiP", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 476 | |
}, | |
"outputId": "c81fb554-8418-4b9b-d928-6cc624780bec" | |
}, | |
"source": [ | |
"\n", | |
"# Next, we build a very simple model.\n", | |
"model = Sequential()\n", | |
"model.add(Flatten(input_shape=(1,) + env.observation_space.shape))\n", | |
"model.add(Dense(16))\n", | |
"model.add(Activation('relu'))\n", | |
"model.add(Dense(16))\n", | |
"model.add(Activation('relu'))\n", | |
"model.add(Dense(16))\n", | |
"model.add(Activation('relu'))\n", | |
"model.add(Dense(nb_actions))\n", | |
"model.add(Activation('linear'))\n", | |
"print(model.summary())" | |
], | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Model: \"sequential_2\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"flatten_2 (Flatten) (None, 4) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_5 (Dense) (None, 16) 80 \n", | |
"_________________________________________________________________\n", | |
"activation_5 (Activation) (None, 16) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_6 (Dense) (None, 16) 272 \n", | |
"_________________________________________________________________\n", | |
"activation_6 (Activation) (None, 16) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_7 (Dense) (None, 16) 272 \n", | |
"_________________________________________________________________\n", | |
"activation_7 (Activation) (None, 16) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_8 (Dense) (None, 2) 34 \n", | |
"_________________________________________________________________\n", | |
"activation_8 (Activation) (None, 2) 0 \n", | |
"=================================================================\n", | |
"Total params: 658\n", | |
"Trainable params: 658\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n", | |
"None\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "pTeDxvrhL2Q0", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and\n", | |
"# even the metrics!\n", | |
"memory = SequentialMemory(limit=50000, window_length=1)\n", | |
"policy = EpsGreedyQPolicy()\n", | |
"dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,target_model_update=0.01, policy=policy)\n", | |
"dqn.compile(Adam(lr=1e-3), metrics=['mae'])" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "P2dQE4m7Yisl", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 428 | |
}, | |
"outputId": "f44fc815-7e36-4621-b4f0-17e9de768e4f" | |
}, | |
"source": [ | |
"# Okay, now it's time to learn something! We visualize the training here for show, but this\n", | |
"# slows down training quite a lot. You can always safely abort the training prematurely using\n", | |
"# Ctrl + C.\n", | |
"dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)" | |
], | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Training for 50000 steps ...\n", | |
"Interval 1 (0 steps performed)\n", | |
"\r 1/10000 [..............................] - ETA: 17:38 - reward: 1.0000" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/dist-packages/rl/memory.py:39: UserWarning: Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!\n", | |
" warnings.warn('Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!')\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"10000/10000 [==============================] - 37s 4ms/step - reward: 1.0000\n", | |
"146 episodes - episode_reward: 67.425 [8.000, 200.000] - loss: 1.457 - mean_absolute_error: 18.473 - mean_q: 37.124\n", | |
"\n", | |
"Interval 2 (10000 steps performed)\n", | |
"10000/10000 [==============================] - 35s 4ms/step - reward: 1.0000\n", | |
"61 episodes - episode_reward: 164.639 [135.000, 200.000] - loss: 1.972 - mean_absolute_error: 37.012 - mean_q: 74.419\n", | |
"\n", | |
"Interval 3 (20000 steps performed)\n", | |
"10000/10000 [==============================] - 34s 3ms/step - reward: 1.0000\n", | |
"53 episodes - episode_reward: 189.302 [100.000, 200.000] - loss: 1.555 - mean_absolute_error: 35.906 - mean_q: 71.968\n", | |
"\n", | |
"Interval 4 (30000 steps performed)\n", | |
"10000/10000 [==============================] - 34s 3ms/step - reward: 1.0000\n", | |
"54 episodes - episode_reward: 184.648 [12.000, 200.000] - loss: 3.483 - mean_absolute_error: 34.904 - mean_q: 69.962\n", | |
"\n", | |
"Interval 5 (40000 steps performed)\n", | |
"10000/10000 [==============================] - 34s 3ms/step - reward: 1.0000\n", | |
"done, took 174.966 seconds\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f31a069e780>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 16 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "mmykvkN7aQIy", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# After training is done, we save the final weights.\n", | |
"dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)\n", | |
"from google.colab import files\n", | |
"files.download('dqn_{}_weights.h5f'.format(ENV_NAME)) " | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "r_xyZdh_ZSlK", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 136 | |
}, | |
"outputId": "a169ae9a-44fc-4dab-d8de-a9a0a228f641" | |
}, | |
"source": [ | |
"# Finally, evaluate our algorithm for 5 episodes.\n", | |
"dqn.test(env, nb_episodes=5, visualize=False)" | |
], | |
"execution_count": 17, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Testing for 5 episodes ...\n", | |
"Episode 1: reward: 200.000, steps: 200\n", | |
"Episode 2: reward: 200.000, steps: 200\n", | |
"Episode 3: reward: 200.000, steps: 200\n", | |
"Episode 4: reward: 200.000, steps: 200\n", | |
"Episode 5: reward: 200.000, steps: 200\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f319cd930f0>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 17 | |
} | |
] | |
} | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import gym | |
import time | |
from keras.models import Sequential | |
from keras.layers import Dense, Activation, Flatten | |
from keras.optimizers import Adam | |
from rl.agents.dqn import DQNAgent | |
from rl.policy import EpsGreedyQPolicy | |
from rl.memory import SequentialMemory | |
ENV_NAME = 'MountainCar-v0' | |
# Get the environment and extract the number of actions. | |
env = gym.make(ENV_NAME) | |
np.random.seed(123) | |
env.seed(123) | |
nb_actions = env.action_space.n | |
# Next, we build a very simple model. | |
model = Sequential() | |
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) | |
model.add(Dense(16)) | |
model.add(Activation('relu')) | |
model.add(Dense(16)) | |
model.add(Activation('relu')) | |
model.add(Dense(16)) | |
model.add(Activation('relu')) | |
model.add(Dense(nb_actions)) | |
model.add(Activation('linear')) | |
print(model.summary()) | |
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and | |
# even the metrics! | |
memory = SequentialMemory(limit=50000, window_length=1) | |
policy = EpsGreedyQPolicy() | |
dqn = DQNAgent(model=model, | |
nb_actions=nb_actions, | |
memory=memory, | |
nb_steps_warmup=10, | |
target_model_update=0.01, | |
policy=policy) | |
dqn.compile(Adam(lr=1e-3), metrics=['mae']) | |
dqn.load_weights('dqn_MountainCar-v0_weights.h5f') | |
dqn.test(env, nb_episodes=15, visualize=True) |
View raw
(Sorry about that, but we can’t show files that are this big right now.)
View raw
(Sorry about that, but we can’t show files that are this big right now.)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment