serser/maze_policy.py

## maze_policy.py
# https://classroom.udacity.com/courses/cs373/lessons/48646841/concepts/485327600923
# ----------
# User Instructions:
#
# Write a function optimum_policy that returns
# a grid which shows the optimum policy for robot
# motion. This means there should be an optimum
# direction associated with each navigable cell from
# which the goal can be reached.
#
# Unnavigable cells as well as cells from which
# the goal cannot be reached should have a string
# containing a single space (' '), as shown in the
# previous video. The goal cell should have '*'.
# ----------

grid = [[0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0]]
init = [0, 0]
goal = [len(grid)-1, len(grid[0])-1]
cost = 1 # the cost associated with moving from a cell to an adjacent one

delta = [[-1, 0 ], # go up
         [ 0, -1], # go left
         [ 1, 0 ], # go down
         [ 0, 1 ]] # go right

delta_name = ['^', '<', 'v', '>']

def optimum_policy(grid,goal,cost):
    # ----------------------------------------
    # modify code below
    # ----------------------------------------
    value = [[99 for row in range(len(grid[0]))] for col in range(len(grid))]
    policy = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]
    change = True

    while change:
        change = False

        for x in range(len(grid)):
            for y in range(len(grid[0])):
                if goal[0] == x and goal[1] == y:
                    # setting the goal
                    if value[x][y] > 0:
                        value[x][y] = 0
                        policy[x][y] = '*'
                        change = True
                # visitable
                elif grid[x][y] == 0:
                    for a in range(len(delta)):
                        # for each grid, get next move
                        x2 = x + delta[a][0]
                        y2 = y + delta[a][1]
                        # within the range and reachable
                        if x2 >= 0 and x2 < len(grid) and y2 >= 0 and y2 < len(grid[0]) and grid[x2][y2] == 0:
                            v2 = value[x2][y2] + cost
                            # found better approach
                            if v2 < value[x][y]:
                                change = True
                                value[x][y] = v2
                                policy[x][y] = delta_name[a]

    return policy

for row in optimum_policy(grid,goal,cost):
    print row
	# https://classroom.udacity.com/courses/cs373/lessons/48646841/concepts/485327600923
	# ----------
	# User Instructions:
	#
	# Write a function optimum_policy that returns
	# a grid which shows the optimum policy for robot
	# motion. This means there should be an optimum
	# direction associated with each navigable cell from
	# which the goal can be reached.
	#
	# Unnavigable cells as well as cells from which
	# the goal cannot be reached should have a string
	# containing a single space (' '), as shown in the
	# previous video. The goal cell should have '*'.
	# ----------

	grid = [[0, 1, 0, 0, 0, 0],
	[0, 1, 0, 0, 0, 0],
	[0, 1, 0, 0, 0, 0],
	[0, 1, 0, 0, 0, 0],
	[0, 0, 0, 0, 1, 0]]
	init = [0, 0]
	goal = [len(grid)-1, len(grid[0])-1]
	cost = 1 # the cost associated with moving from a cell to an adjacent one

	delta = [[-1, 0 ], # go up
	[ 0, -1], # go left
	[ 1, 0 ], # go down
	[ 0, 1 ]] # go right

	delta_name = ['^', '<', 'v', '>']

	def optimum_policy(grid,goal,cost):
	# ----------------------------------------
	# modify code below
	# ----------------------------------------
	value = [[99 for row in range(len(grid[0]))] for col in range(len(grid))]
	policy = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]
	change = True

	while change:
	change = False

	for x in range(len(grid)):
	for y in range(len(grid[0])):
	if goal[0] == x and goal[1] == y:
	# setting the goal
	if value[x][y] > 0:
	value[x][y] = 0
	policy[x][y] = '*'
	change = True
	# visitable
	elif grid[x][y] == 0:
	for a in range(len(delta)):
	# for each grid, get next move
	x2 = x + delta[a][0]
	y2 = y + delta[a][1]
	# within the range and reachable
	if x2 >= 0 and x2 < len(grid) and y2 >= 0 and y2 < len(grid[0]) and grid[x2][y2] == 0:
	v2 = value[x2][y2] + cost
	# found better approach
	if v2 < value[x][y]:
	change = True
	value[x][y] = v2
	policy[x][y] = delta_name[a]

	return policy

	for row in optimum_policy(grid,goal,cost):
	print row