ka9e/td_learn.py

## td_learn.py

from random import random, choice, seed

MAZE = (
    # (1, 1, 1, 1, 1, 1, 1),
    # (1, 0, 0, 0, 1, 1, 1),
    # (1, 0, 1, 0, 0, 0, 1),
    # (1, 0, 1, 1, 0, 1, 1),
    # (1, 0, 1, 1, 0, 1, 1),
    # (1, 0, 0, 0, 0, 0, 1),
    # (1, 1, 1, 1, 1, 1, 1),
    (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
    (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
    (1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1),
    (1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1),
    (1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1),
    (1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1),
    (1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1),
    (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
)

LENY = len(MAZE)
LENX = len(MAZE[0])

EPS = 0.5
GAMMA = 0.9
ALPHA = 0.5
REWARD = 255

START = (1, 1)
GOAL = (6, 9)

# START = (4, 1)
# GOAL = (3, 4)

V = [[0] * LENX for x in range(LENY)]
V[GOAL[0]][GOAL[1]] = REWARD


def seek_ways(st):
    y, x = st
    out = []

    def seek(y, x):
        if MAZE[y][x] == 0:
            out.append((y, x))

    seek(y-1, x)
    seek(y+1, x)
    seek(y, x-1)
    seek(y, x+1)

    return out


def next_state(st):
    out = []
    max_r = 0

    if random() > EPS:
        # greedy
        for pos in seek_ways(st):
            y, x = pos
            r = V[y][x]

            if max_r > r:
                continue
            elif max_r < r:
                max_r = r
                out.clear()

            out.append(pos)
        else:
            st_n = choice(out)
            return st_n
    else:
        # random
        st_n = choice(seek_ways(st))

    return st_n

def update():
    st = START

    while True:
        st_n = next_state(st)

        yn, xn = st_n
        val_n = V[yn][xn]

        y, x = st
        val = V[y][x]

        if st_n == GOAL:
            V[y][x] += ALPHA * (REWARD - val)
            break

        else:
            V[y][x] += ALPHA * (GAMMA * val_n - val)

        st = st_n


if __name__ == '__main__':
    seed()
    LOOP = 10000
    for i in range(LOOP):
        update()

    # format to csv & write
    with open(file="result.txt", mode='w') as f:
        f.write("\n".join("\t".join([str(round(x)) for x in line]) for line in V))

	from random import random, choice, seed

	MAZE = (
	# (1, 1, 1, 1, 1, 1, 1),
	# (1, 0, 0, 0, 1, 1, 1),
	# (1, 0, 1, 0, 0, 0, 1),
	# (1, 0, 1, 1, 0, 1, 1),
	# (1, 0, 1, 1, 0, 1, 1),
	# (1, 0, 0, 0, 0, 0, 1),
	# (1, 1, 1, 1, 1, 1, 1),
	(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
	(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
	(1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1),
	(1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1),
	(1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1),
	(1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1),
	(1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1),
	(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
	)

	LENY = len(MAZE)
	LENX = len(MAZE[0])

	EPS = 0.5
	GAMMA = 0.9
	ALPHA = 0.5
	REWARD = 255

	START = (1, 1)
	GOAL = (6, 9)

	# START = (4, 1)
	# GOAL = (3, 4)

	V = [[0] * LENX for x in range(LENY)]
	V[GOAL[0]][GOAL[1]] = REWARD


	def seek_ways(st):
	y, x = st
	out = []

	def seek(y, x):
	if MAZE[y][x] == 0:
	out.append((y, x))

	seek(y-1, x)
	seek(y+1, x)
	seek(y, x-1)
	seek(y, x+1)

	return out


	def next_state(st):
	out = []
	max_r = 0

	if random() > EPS:
	# greedy
	for pos in seek_ways(st):
	y, x = pos
	r = V[y][x]

	if max_r > r:
	continue
	elif max_r < r:
	max_r = r
	out.clear()

	out.append(pos)
	else:
	st_n = choice(out)
	return st_n
	else:
	# random
	st_n = choice(seek_ways(st))

	return st_n

	def update():
	st = START

	while True:
	st_n = next_state(st)

	yn, xn = st_n
	val_n = V[yn][xn]

	y, x = st
	val = V[y][x]

	if st_n == GOAL:
	V[y][x] += ALPHA * (REWARD - val)
	break

	else:
	V[y][x] += ALPHA * (GAMMA * val_n - val)

	st = st_n


	if __name__ == '__main__':
	seed()
	LOOP = 10000
	for i in range(LOOP):
	update()

	# format to csv & write
	with open(file="result.txt", mode='w') as f:
	f.write("\n".join("\t".join([str(round(x)) for x in line]) for line in V))