Skip to content

Instantly share code, notes, and snippets.

@Ronnasayd
Created October 13, 2020 20:50
Show Gist options
  • Save Ronnasayd/25466d09a40c58b962d58bc7f50dcb42 to your computer and use it in GitHub Desktop.
Save Ronnasayd/25466d09a40c58b962d58bc7f50dcb42 to your computer and use it in GitHub Desktop.
Algoritmos e implementações para diversas redes neurais
import numpy as np
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory
ENV_NAME = 'CartPole-v0'
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n
# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model,
nb_actions=nb_actions,
memory=memory,
nb_steps_warmup=10,
target_model_update=0.01,
policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.load_weights('dqn_CartPole-v0_weights.h5f')
dqn.test(env, nb_episodes=15, visualize=True, verbose=2)
import gym
import time
import numpy as np
import random
env = gym.make('FrozenLake8x8-v0') # selecionar o ambiente
Q = np.ones(
(env.observation_space.n, env.action_space.n), dtype=np.float32
) # construir a tabela Q onde as linhas sao os estados possiveis e as colunas as ações
epsilon = 1
epsilon_decay = 0.9995
epsilon_min = 0.01
alpha = 1 # taxa de aprendizagem inicial
alpha_decay = 0.9995
alpha_min = 0.01 # taxa de aprendizagem minima
gamma = 0.9 # gamma para recompensas
MAX_NUMBER_STEPS = 10000
steps = 0
eta = max(alpha_min, alpha)
eps = max(epsilon_min, epsilon)
total_reward = 0
state = env.reset() # inicializa o ambiente
while steps < MAX_NUMBER_STEPS:
if random.uniform(
0, 1) < eps: # se um numero aleatorio for menor que epsilon
action = env.action_space.sample(
) # escolha uma ação aleatoria (exploration)
else:
action = np.argmax(
Q[state, ]
) # escolha a ação que vai da o maior retorno para aquele estado (exploitation)
new_state, reward, done, info = env.step(
action) # execute a ação e receba uma nova observação do ambiente
total_reward += reward
if done:
target = reward
else:
target = reward + gamma * np.max(Q[new_state, ])
Q[state, action] = (1 - eta) * Q[state, action] + eta * (
target) # atualize a tabela Q com a recompensa recebida pela ação
state = new_state # atualize o estado
if done:
print(
f'steps: {steps} total_reward: {total_reward} alpha: {eta:.4f} epsilon: {eps:.4f} '
)
total_reward = 0
state = env.reset() # inicializa o ambiente
steps += 1
alpha = alpha * alpha_decay
eta = max(alpha_min,
alpha) # faz o decaimento da taxa de aprendizagem
epsilon = epsilon * epsilon_decay
eps = max(epsilon_min, epsilon)
print("Testando...")
# teste apos treinamento
done = False
state = env.reset()
total_reward = 0
while not done:
env.render()
action = np.argmax(Q[state, ])
state, reward, done, info = env.step(action)
print(reward)
time.sleep(1 / 24)
import gym
import time
import numpy as np
import random
env = gym.make('FrozenLake-v0') # selecionar o ambiente
Q = np.ones(
(env.observation_space.n, env.action_space.n), dtype=np.float32
) # construir a tabela Q onde as linhas sao os estados possiveis e as colunas as ações
epsilon = 1
epsilon_decay = 0.99
epsilon_min = 0.01
alpha = 1 # taxa de aprendizagem inicial
alpha_decay = 0.999
alpha_min = 0.01 # taxa de aprendizagem minima
gamma = 0.9999 # gamma para recompensas
MAX_NUMBER_STEPS = 10000
steps = 0
eta = max(alpha_min, alpha)
eps = max(epsilon_min, epsilon)
total_reward = 0
state = env.reset() # inicializa o ambiente
while steps < MAX_NUMBER_STEPS:
if random.uniform(
0, 1) < eps: # se um numero aleatorio for menor que epsilon
action = env.action_space.sample(
) # escolha uma ação aleatoria (exploration)
else:
action = np.argmax(
Q[state, ]
) # escolha a ação que vai da o maior retorno para aquele estado (exploitation)
new_state, reward, done, info = env.step(
action) # execute a ação e receba uma nova observação do ambiente
total_reward += reward
if done:
target = reward
else:
target = reward + gamma * np.max(Q[new_state, ])
Q[state, action] = (1 - eta) * Q[state, action] + eta * (
target) # atualize a tabela Q com a recompensa recebida pela ação
state = new_state # atualize o estado
if done:
print(
f'total_reward: {total_reward} steps: {steps} alpha: {eta} epsilon: {eps}'
)
total_reward = 0
state = env.reset() # inicializa o ambiente
steps += 1
alpha = alpha * alpha_decay
eta = max(alpha_min,
alpha) # faz o decaimento da taxa de aprendizagem
epsilon = epsilon * epsilon_decay
eps = max(epsilon_min, epsilon)
print("Testando...")
# teste apos treinamento
done = False
state = env.reset()
total_reward = 0
while not done:
env.render()
action = np.argmax(Q[state, ])
state, reward, done, info = env.step(action)
print(reward)
time.sleep(1 / 24)
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import numpy as np
import gym
import time
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory
ENV_NAME = 'MountainCar-v0'
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n
# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model,
nb_actions=nb_actions,
memory=memory,
nb_steps_warmup=10,
target_model_update=0.01,
policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.load_weights('dqn_MountainCar-v0_weights.h5f')
dqn.test(env, nb_episodes=15, visualize=True)
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment