Skip to content

Instantly share code, notes, and snippets.

@Dviejopomata
Created May 6, 2019 11:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dviejopomata/9c9b4b083f6fe1a558b1f1fbbd296d37 to your computer and use it in GitHub Desktop.
Save Dviejopomata/9c9b4b083f6fe1a558b1f1fbbd296d37 to your computer and use it in GitHub Desktop.
import itertools
import random
import numpy as np
class Game:
def __init__(self):
"""
0 - casilla vacia
1 - jugador
2 - recompensa
"""
self.state = random.sample([0, 1, 2], 3)
"""
0 - Izquierda
1 - Derecha
"""
self.actions = [0, 1]
def step(self, action):
"""
Realiza una accion en el juego
:param action: 0 - izquierda 1 - derecha
:return:
"""
distance = 1 if action == 1 else -1
player_idx = self.state.index(1)
reward_idx = self.state.index(2)
if 0 <= (player_idx + distance) <= 2:
new_idx = player_idx + distance
self.state[player_idx] = 0
self.state[new_idx] = 1
done = new_idx == reward_idx
if done:
reward = 10
else:
reward = 0
else:
reward = -10
done = False
return self.state, reward, done, {}
def reset(self):
"""
Resetea el estado del juego
:return:
"""
self.state = random.sample([0, 1, 2], 3)
return self.state
def state_to_number(self):
"""
Mapea un estado a un numero que sera el indice en la tabla Q
:return:
"""
map = {
0: [1, 0, 2],
1: [0, 1, 2],
2: [0, 2, 1],
3: [1, 2, 0],
4: [2, 1, 0],
5: [2, 0, 1],
}
return [key for key, value in map.items() if value == self.state]
game = Game()
LR = 0.01
GAMMA = 0.9
NUM_EPISODES = 1000
epsilon = 0.8
comb_states = sum(1 for i in itertools.combinations(range(4), 2))
# q_table = np.random.uniform(low=-1, high=1, size=(comb_states, len(game.actions)))
q_table = np.array([
# estado 0 [1,0,2]
[10, -1],
# estado 1 [0,1,2]
[10, -1],
# estado 2 [0,2,1]
[-1, 1],
# estado 3 [1,2,0]
[-1, 1],
# estado 4 [2,1,0]
[-1, 1],
# estado 5 [2,0,1]
[-1, 1],
], dtype="float64")
print(q_table)
for i in range(NUM_EPISODES):
state = game.reset()
state_id = game.state_to_number()
while True:
if np.random.random() < 1 - epsilon:
action = np.argmax(q_table[state_id])
else:
action = np.random.randint(0, len(game.actions))
state2, reward, done, info = game.step(action)
if not done:
state2_id = game.state_to_number()
delta = LR * (reward + GAMMA * np.max(q_table[state2_id]) - q_table[state_id, action])
q_table[state_id, action] += delta
state_id = state2_id
else:
q_table[state_id, action] = reward
break
print(q_table)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment