serser/maze_3d.py

## maze_3d.py
# ----------
# User Instructions:
#
# Implement the function optimum_policy2D below.
#
# You are given a car in grid with initial state
# init. Your task is to compute and return the car's
# optimal path to the position specified in goal;
# the costs for each motion are as defined in cost.
#
# There are four motion directions: up, left, down, and right.
# Increasing the index in this array corresponds to making a
# a left turn, and decreasing the index corresponds to making a
# right turn.

forward = [[-1,  0], # go up
           [ 0, -1], # go left
           [ 1,  0], # go down
           [ 0,  1]] # go right
forward_name = ['up', 'left', 'down', 'right']

# action has 3 values: right turn, no turn, left turn
action = [-1, 0, 1]
action_name = ['R', '#', 'L']

# EXAMPLE INPUTS:
# grid format:
#     0 = navigable space
#     1 = unnavigable space
grid = [[1, 1, 1, 0, 0, 0],
        [1, 1, 1, 0, 1, 0],
        [0, 0, 0, 0, 0, 0],
        [1, 1, 1, 0, 1, 1],
        [1, 1, 1, 0, 1, 1]]

init = [4, 3, 0] # given in the form [row,col,direction]
                 # direction = 0: up
                 #             1: left
                 #             2: down
                 #             3: right

goal = [2, 0] # given in the form [row,col]

cost = [2, 1, 20] # cost has 3 values, corresponding to making
                  # a right turn, no turn, and a left turn

# grid = [[0, 0, 0, 0, 1, 1],
#         [0, 0, 1, 0, 0, 0],
#         [0, 0, 0, 0, 1, 0],
#         [0, 0, 1, 1, 1, 0],
#         [0, 0, 0, 0, 1, 0]]
# init = [4, 5, 0]
# goal = [4, 3]
# cost = [1, 1, 1]


# EXAMPLE OUTPUT:
# calling optimum_policy2D with the given parameters should return
# [[' ', ' ', ' ', 'R', '#', 'R'],
#  [' ', ' ', ' ', '#', ' ', '#'],
#  ['*', '#', '#', '#', '#', 'R'],
#  [' ', ' ', ' ', '#', ' ', ' '],
#  [' ', ' ', ' ', '#', ' ', ' ']]
# ----------

# ----------------------------------------
# modify code below
# ----------------------------------------

def optimum_policy2D(grid,init,goal,cost):
    # keeps the minimum cost to goal
    value = [[[999 for col in range(len(grid[0]))] for row in range(len(grid))],
             [[999 for col in range(len(grid[0]))] for row in range(len(grid))],
             [[999 for col in range(len(grid[0]))] for row in range(len(grid))],
             [[999 for col in range(len(grid[0]))] for row in range(len(grid))]]
    # whether it has been in the open list
    visited = [[[999 if grid[row][col] else 0 for col in range(len(grid[0]))] for row in range(len(grid))],
               [[999 if grid[row][col] else 0 for col in range(len(grid[0]))] for row in range(len(grid))],
               [[999 if grid[row][col] else 0 for col in range(len(grid[0]))] for row in range(len(grid))],
               [[999 if grid[row][col] else 0 for col in range(len(grid[0]))] for row in range(len(grid))]]
    # policy in 3d
    policy = [[[' ' for col in range(len(grid[0]))] for row in range(len(grid))],
              [[' ' for col in range(len(grid[0]))] for row in range(len(grid))],
              [[' ' for col in range(len(grid[0]))] for row in range(len(grid))],
              [[' ' for col in range(len(grid[0]))] for row in range(len(grid))]]
    # projection to 2d
    policy2D = [[' ' for col in range(len(grid[0]))] for row in range(len(grid))]
    # previous position
    prepos = [[[[] for col in range(len(grid[0]))] for row in range(len(grid))],
              [[[] for col in range(len(grid[0]))] for row in range(len(grid))],
              [[[] for col in range(len(grid[0]))] for row in range(len(grid))],
              [[[] for col in range(len(grid[0]))] for row in range(len(grid))]]

    # set goal value
    gx,gy = goal
    for i,f in enumerate(forward):
        value[i][gx][gy] = 0

    done=False
    opened=[]
    cx,cy,cd = init
    cst = 0

    while not done:
        for i,a in enumerate(action):
            nd = (cd + a) % 4
            # moves only according to its direction
            nx = cx + forward[nd][0]
            ny = cy + forward[nd][1]
            nc = cst + cost[i]
            # within range
            if (0<=nx<len(grid)) and (0<=ny<len(grid[0])):
                # not visited, and not goal
                if visited[nd][nx][ny] == 0:
                    value[nd][nx][ny] = nc
                    # record action to nx,ny as well
                    opened.append([nc, i, nd, nx, ny])

        #print opened
        if len(opened) == 0:
            return 'fail'
        else:
            opened.sort()
            opened.reverse()
            nxt = opened.pop()
            cst, ca, cd, cx, cy = nxt

            visited[cd][cx][cy] = 1
            # find the previous position and action
            # and update policy since we found a better one
            pd = (cd - action[ca]) % 4
            px = cx - forward[cd][0]
            py = cy - forward[cd][1]
            policy[pd][px][py] = action_name[ca]
            prepos[cd][cx][cy] = [pd,px,py,ca]
            #print cst, cx,cy,cd, action_name[ca], px,py,pd

            if [cx, cy] == goal:
                #print 'found goal'
                done = True
                policy[cd][cx][cy] = '*'
                policy2D[cx][cy] = '*'


    cx,cy = goal
    done = False
    cnt = 0
    cd = 0
    for i in range(4):
        if policy[i][cx][cy] == '*':
            cd = i
    while not done:
        cd,cx,cy,pa = prepos[cd][cx][cy]
        # print action_name[pa]
        # if cnt<20:
        #     cnt+=1
        # else:
        #     break
        if [cx,cy, init[2]] == init:
            done = True

        policy2D[cx][cy] = action_name[pa]

    return policy2D

for r in optimum_policy2D(grid,init,goal,cost):
    print r
	# ----------
	# User Instructions:
	#
	# Implement the function optimum_policy2D below.
	#
	# You are given a car in grid with initial state
	# init. Your task is to compute and return the car's
	# optimal path to the position specified in goal;
	# the costs for each motion are as defined in cost.
	#
	# There are four motion directions: up, left, down, and right.
	# Increasing the index in this array corresponds to making a
	# a left turn, and decreasing the index corresponds to making a
	# right turn.

	forward = [[-1, 0], # go up
	[ 0, -1], # go left
	[ 1, 0], # go down
	[ 0, 1]] # go right
	forward_name = ['up', 'left', 'down', 'right']

	# action has 3 values: right turn, no turn, left turn
	action = [-1, 0, 1]
	action_name = ['R', '#', 'L']

	# EXAMPLE INPUTS:
	# grid format:
	# 0 = navigable space
	# 1 = unnavigable space
	grid = [[1, 1, 1, 0, 0, 0],
	[1, 1, 1, 0, 1, 0],
	[0, 0, 0, 0, 0, 0],
	[1, 1, 1, 0, 1, 1],
	[1, 1, 1, 0, 1, 1]]

	init = [4, 3, 0] # given in the form [row,col,direction]
	# direction = 0: up
	# 1: left
	# 2: down
	# 3: right

	goal = [2, 0] # given in the form [row,col]

	cost = [2, 1, 20] # cost has 3 values, corresponding to making
	# a right turn, no turn, and a left turn

	# grid = [[0, 0, 0, 0, 1, 1],
	# [0, 0, 1, 0, 0, 0],
	# [0, 0, 0, 0, 1, 0],
	# [0, 0, 1, 1, 1, 0],
	# [0, 0, 0, 0, 1, 0]]
	# init = [4, 5, 0]
	# goal = [4, 3]
	# cost = [1, 1, 1]



	# EXAMPLE OUTPUT:
	# calling optimum_policy2D with the given parameters should return
	# [[' ', ' ', ' ', 'R', '#', 'R'],
	# [' ', ' ', ' ', '#', ' ', '#'],
	# ['*', '#', '#', '#', '#', 'R'],
	# [' ', ' ', ' ', '#', ' ', ' '],
	# [' ', ' ', ' ', '#', ' ', ' ']]
	# ----------

	# ----------------------------------------
	# modify code below
	# ----------------------------------------

	def optimum_policy2D(grid,init,goal,cost):
	# keeps the minimum cost to goal
	value = [[[999 for col in range(len(grid[0]))] for row in range(len(grid))],
	[[999 for col in range(len(grid[0]))] for row in range(len(grid))],
	[[999 for col in range(len(grid[0]))] for row in range(len(grid))],
	[[999 for col in range(len(grid[0]))] for row in range(len(grid))]]
	# whether it has been in the open list
	visited = [[[999 if grid[row][col] else 0 for col in range(len(grid[0]))] for row in range(len(grid))],
	[[999 if grid[row][col] else 0 for col in range(len(grid[0]))] for row in range(len(grid))],
	[[999 if grid[row][col] else 0 for col in range(len(grid[0]))] for row in range(len(grid))],
	[[999 if grid[row][col] else 0 for col in range(len(grid[0]))] for row in range(len(grid))]]
	# policy in 3d
	policy = [[[' ' for col in range(len(grid[0]))] for row in range(len(grid))],
	[[' ' for col in range(len(grid[0]))] for row in range(len(grid))],
	[[' ' for col in range(len(grid[0]))] for row in range(len(grid))],
	[[' ' for col in range(len(grid[0]))] for row in range(len(grid))]]
	# projection to 2d
	policy2D = [[' ' for col in range(len(grid[0]))] for row in range(len(grid))]
	# previous position
	prepos = [[[[] for col in range(len(grid[0]))] for row in range(len(grid))],
	[[[] for col in range(len(grid[0]))] for row in range(len(grid))],
	[[[] for col in range(len(grid[0]))] for row in range(len(grid))],
	[[[] for col in range(len(grid[0]))] for row in range(len(grid))]]

	# set goal value
	gx,gy = goal
	for i,f in enumerate(forward):
	value[i][gx][gy] = 0

	done=False
	opened=[]
	cx,cy,cd = init
	cst = 0

	while not done:
	for i,a in enumerate(action):
	nd = (cd + a) % 4
	# moves only according to its direction
	nx = cx + forward[nd][0]
	ny = cy + forward[nd][1]
	nc = cst + cost[i]
	# within range
	if (0<=nx<len(grid)) and (0<=ny<len(grid[0])):
	# not visited, and not goal
	if visited[nd][nx][ny] == 0:
	value[nd][nx][ny] = nc
	# record action to nx,ny as well
	opened.append([nc, i, nd, nx, ny])

	#print opened
	if len(opened) == 0:
	return 'fail'
	else:
	opened.sort()
	opened.reverse()
	nxt = opened.pop()
	cst, ca, cd, cx, cy = nxt

	visited[cd][cx][cy] = 1
	# find the previous position and action
	# and update policy since we found a better one
	pd = (cd - action[ca]) % 4
	px = cx - forward[cd][0]
	py = cy - forward[cd][1]
	policy[pd][px][py] = action_name[ca]
	prepos[cd][cx][cy] = [pd,px,py,ca]
	#print cst, cx,cy,cd, action_name[ca], px,py,pd

	if [cx, cy] == goal:
	#print 'found goal'
	done = True
	policy[cd][cx][cy] = '*'
	policy2D[cx][cy] = '*'


	cx,cy = goal
	done = False
	cnt = 0
	cd = 0
	for i in range(4):
	if policy[i][cx][cy] == '*':
	cd = i
	while not done:
	cd,cx,cy,pa = prepos[cd][cx][cy]
	# print action_name[pa]
	# if cnt<20:
	# cnt+=1
	# else:
	# break
	if [cx,cy, init[2]] == init:
	done = True

	policy2D[cx][cy] = action_name[pa]

	return policy2D

	for r in optimum_policy2D(grid,init,goal,cost):
	print r