Skip to content

Instantly share code, notes, and snippets.

@ka9e
Created January 31, 2015 10:35
Show Gist options
  • Save ka9e/2432657aaddfed7c9633 to your computer and use it in GitHub Desktop.
Save ka9e/2432657aaddfed7c9633 to your computer and use it in GitHub Desktop.
seeking goal of maze with Temporal Difference learning and ε-greedy strategy
from random import random, choice, seed
MAZE = (
# (1, 1, 1, 1, 1, 1, 1),
# (1, 0, 0, 0, 1, 1, 1),
# (1, 0, 1, 0, 0, 0, 1),
# (1, 0, 1, 1, 0, 1, 1),
# (1, 0, 1, 1, 0, 1, 1),
# (1, 0, 0, 0, 0, 0, 1),
# (1, 1, 1, 1, 1, 1, 1),
(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
(1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1),
(1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1),
(1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1),
(1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1),
(1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1),
(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
)
LENY = len(MAZE)
LENX = len(MAZE[0])
EPS = 0.5
GAMMA = 0.9
ALPHA = 0.5
REWARD = 255
START = (1, 1)
GOAL = (6, 9)
# START = (4, 1)
# GOAL = (3, 4)
V = [[0] * LENX for x in range(LENY)]
V[GOAL[0]][GOAL[1]] = REWARD
def seek_ways(st):
y, x = st
out = []
def seek(y, x):
if MAZE[y][x] == 0:
out.append((y, x))
seek(y-1, x)
seek(y+1, x)
seek(y, x-1)
seek(y, x+1)
return out
def next_state(st):
out = []
max_r = 0
if random() > EPS:
# greedy
for pos in seek_ways(st):
y, x = pos
r = V[y][x]
if max_r > r:
continue
elif max_r < r:
max_r = r
out.clear()
out.append(pos)
else:
st_n = choice(out)
return st_n
else:
# random
st_n = choice(seek_ways(st))
return st_n
def update():
st = START
while True:
st_n = next_state(st)
yn, xn = st_n
val_n = V[yn][xn]
y, x = st
val = V[y][x]
if st_n == GOAL:
V[y][x] += ALPHA * (REWARD - val)
break
else:
V[y][x] += ALPHA * (GAMMA * val_n - val)
st = st_n
if __name__ == '__main__':
seed()
LOOP = 10000
for i in range(LOOP):
update()
# format to csv & write
with open(file="result.txt", mode='w') as f:
f.write("\n".join("\t".join([str(round(x)) for x in line]) for line in V))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment